github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/logexporter/cmd/main.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // TODO(shyamjvs): Make this exporter work for master too, currently facing
    18  // gcloud auth error when run from within a pod on the master.
    19  
    20  package main
    21  
    22  import (
    23  	"flag"
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"os"
    28  	"os/exec"
    29  	"path/filepath"
    30  	"time"
    31  
    32  	"github.com/golang/glog"
    33  )
    34  
    35  // Initialize the log exporter's configuration related flags.
    36  var (
    37  	nodeName             = flag.String("node-name", "", "Name of the node this log exporter is running on")
    38  	gcsPath              = flag.String("gcs-path", "", "Path to the GCS directory under which to upload logs, for eg: gs://my-logs-bucket/logs")
    39  	cloudProvider        = flag.String("cloud-provider", "", "Cloud provider for this node (gce/gke/aws/kubemark/..)")
    40  	gcloudAuthFilePath   = flag.String("gcloud-auth-file-path", "/etc/service-account/service-account.json", "Path to gcloud service account file, for authenticating gsutil to write to GCS bucket")
    41  	enableHollowNodeLogs = flag.Bool("enable-hollow-node-logs", false, "Enable uploading hollow node logs too. Relevant only for kubemark nodes")
    42  	sleepDuration        = flag.Duration("sleep-duration", 60*time.Second, "Duration to sleep before exiting with success. Useful for making pods schedule with hard anti-affinity when run as a job on a k8s cluster")
    43  )
    44  
    45  var (
    46  	localLogPath = "/var/log"
    47  
    48  	// Node-type specific logfiles.
    49  	// Currently we only handle nodes, and neglect master.
    50  	nodeLogs = []string{"kube-proxy", "node-problem-detector", "fluentd"}
    51  
    52  	// Cloud provider specific logfiles.
    53  	awsLogs      = []string{"cloud-init-output"}
    54  	gceLogs      = []string{"startupscript"}
    55  	kubemarkLogs = []string{}
    56  
    57  	// System services/kernel related logfiles.
    58  	kernelLog            = "kern"
    59  	initdLogs            = []string{"docker"}
    60  	supervisordLogs      = []string{"kubelet", "supervisor/supervisord", "supervisor/kubelet-stdout", "supervisor/kubelet-stderr", "supervisor/docker-stdout", "supervisor/docker-stderr"}
    61  	systemdServices      = []string{"kern", "kubelet", "docker"}
    62  	systemdSetupServices = []string{"kube-node-installation", "kube-node-configuration"}
    63  	nodeSystemdServices  = []string{"node-problem-detector"}
    64  )
    65  
    66  // Check if the config provided through the flags take valid values.
    67  func checkConfigValidity() error {
    68  	glog.Infof("Verifying if a valid config has been provided through the flags")
    69  	if *nodeName == "" {
    70  		return fmt.Errorf("Flag --node-name has its value unspecified")
    71  	}
    72  	if *gcsPath == "" {
    73  		return fmt.Errorf("Flag --gcs-path has its value unspecified")
    74  	}
    75  	if _, err := os.Stat(*gcloudAuthFilePath); err != nil {
    76  		return fmt.Errorf("Could not find the gcloud service account file: %v", err)
    77  	} else {
    78  		cmd := exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+*gcloudAuthFilePath)
    79  		if err := cmd.Run(); err != nil {
    80  			return fmt.Errorf("Failed to activate gcloud service account: %v", err)
    81  		}
    82  	}
    83  	return nil
    84  }
    85  
    86  // Create logfile for systemd service in outputDir with the given journalctl outputMode.
    87  func createSystemdLogfile(service string, outputMode string, outputDir string) error {
    88  	// Generate the journalctl command.
    89  	journalCmdArgs := []string{fmt.Sprintf("--output=%v", outputMode), "-D", "/var/log/journal"}
    90  	if service == "kern" {
    91  		journalCmdArgs = append(journalCmdArgs, "-k")
    92  	} else {
    93  		journalCmdArgs = append(journalCmdArgs, "-u", fmt.Sprintf("%v.service", service))
    94  	}
    95  	cmd := exec.Command("journalctl", journalCmdArgs...)
    96  
    97  	// Run the command and record the output to a file.
    98  	output, err := cmd.Output()
    99  	if err != nil {
   100  		return fmt.Errorf("Journalctl command for '%v' service failed: %v", service, err)
   101  	}
   102  	logfile := filepath.Join(outputDir, service+".log")
   103  	if err := ioutil.WriteFile(logfile, output, 0444); err != nil {
   104  		return fmt.Errorf("Writing to file of journalctl logs for '%v' service failed: %v", service, err)
   105  	}
   106  	return nil
   107  }
   108  
   109  // Create logfiles for systemd services in outputDir.
   110  func createSystemdLogfiles(outputDir string) {
   111  	services := append(systemdServices, nodeSystemdServices...)
   112  	for _, service := range services {
   113  		if err := createSystemdLogfile(service, "cat", outputDir); err != nil {
   114  			glog.Warningf("Failed to record journalctl logs: %v", err)
   115  		}
   116  	}
   117  	// Service logs specific to VM setup.
   118  	for _, service := range systemdSetupServices {
   119  		if err := createSystemdLogfile(service, "short-precise", outputDir); err != nil {
   120  			glog.Warningf("Failed to record journalctl logs: %v", err)
   121  		}
   122  	}
   123  }
   124  
   125  // Copy logfiles specific to this node based on the cloud-provider, system services, etc
   126  // to a temporary directory. Also create logfiles for systemd services if journalctl is present.
   127  // We do not expect this function to see an error.
   128  func prepareLogfiles(logDir string) {
   129  	glog.Infof("Preparing logfiles relevant to this node")
   130  	logfiles := nodeLogs[:]
   131  
   132  	switch *cloudProvider {
   133  	case "gce", "gke":
   134  		logfiles = append(logfiles, gceLogs...)
   135  	case "kubemark":
   136  		// TODO(shyamjvs): Pick logs based on kubemark's real provider.
   137  		logfiles = append(logfiles, gceLogs...)
   138  		if *enableHollowNodeLogs {
   139  			logfiles = append(logfiles, kubemarkLogs...)
   140  		}
   141  	case "aws":
   142  		logfiles = append(logfiles, awsLogs...)
   143  	default:
   144  		glog.Errorf("Unknown cloud provider '%v' provided, skipping any provider specific logs", *cloudProvider)
   145  	}
   146  
   147  	// Select system/service specific logs.
   148  	if _, err := os.Stat("/workspace/etc/systemd/journald.conf"); err == nil {
   149  		glog.Infof("Journalctl found on host. Collecting systemd logs")
   150  		createSystemdLogfiles(logDir)
   151  	} else {
   152  		glog.Infof("Journalctl not found on host (%v). Collecting supervisord logs instead", err)
   153  		logfiles = append(logfiles, kernelLog)
   154  		logfiles = append(logfiles, initdLogs...)
   155  		logfiles = append(logfiles, supervisordLogs...)
   156  	}
   157  
   158  	// Copy all the logfiles that exist, to logDir.
   159  	for _, logfile := range logfiles {
   160  		logfileFullPath := filepath.Join(localLogPath, logfile+".log*") // Append .log* to copy rotated logs too.
   161  		cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("cp %v %v", logfileFullPath, logDir))
   162  		if err := cmd.Run(); err != nil {
   163  			glog.Warningf("Failed to copy any logfiles with pattern '%v': %v", logfileFullPath, err)
   164  		}
   165  	}
   166  }
   167  
   168  func uploadLogfilesToGCS(logDir string) error {
   169  	cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("ls %v/*", logDir))
   170  	if output, err := cmd.Output(); err != nil {
   171  		return fmt.Errorf("Could not list any logfiles: %v", err)
   172  	} else {
   173  		glog.Infof("List of logfiles available: %v", string(output))
   174  	}
   175  
   176  	gcsLogPath := *gcsPath + "/" + *nodeName
   177  	glog.Infof("Uploading logfiles to GCS at path '%v'", gcsLogPath)
   178  	var err error
   179  	for uploadAttempt := 0; uploadAttempt < 3; uploadAttempt++ {
   180  		// Upload the files with compression (-z) and parallelism (-m) for speeding
   181  		// up, and set their ACL to make them publicly readable.
   182  		cmd := exec.Command("gsutil", "-m", "-q", "cp", "-a", "public-read", "-c",
   183  			"-z", "log,txt,xml", logDir+"/*", gcsLogPath)
   184  		if err = cmd.Run(); err != nil {
   185  			glog.Errorf("Attempt %v to upload to GCS failed: %v", uploadAttempt, err)
   186  			continue
   187  		}
   188  		return writeSuccessMarkerFile()
   189  	}
   190  	return fmt.Errorf("Multiple attempts of gsutil failed, the final one due to: %v", err)
   191  }
   192  
   193  // Write a marker file to GCS named after this node to indicate logexporter's success.
   194  // The directory to which we write this file can then be used as a registry to quickly
   195  // fetch the list of nodes on which logexporter succeeded.
   196  func writeSuccessMarkerFile() error {
   197  	markerFilePath := *gcsPath + "/logexported-nodes-registry/" + *nodeName + ".txt"
   198  	cmd := exec.Command("gsutil", "-q", "cp", "-a", "public-read", "-", markerFilePath)
   199  	stdin, err := cmd.StdinPipe()
   200  	if err != nil {
   201  		return fmt.Errorf("Failed to get stdin pipe to write marker file: %v", err)
   202  	}
   203  	io.WriteString(stdin, "")
   204  	stdin.Close()
   205  	if err = cmd.Run(); err != nil {
   206  		return fmt.Errorf("Failed to write marker file to GCS: %v", err)
   207  	}
   208  	return nil
   209  }
   210  
   211  func main() {
   212  	flag.Parse()
   213  	if err := checkConfigValidity(); err != nil {
   214  		glog.Fatalf("Bad config provided: %v", err)
   215  	}
   216  
   217  	localTmpLogPath, err := ioutil.TempDir("/tmp", "k8s-systemd-logs")
   218  	if err != nil {
   219  		glog.Fatalf("Could not create temporary dir locally for copying logs: %v", err)
   220  	}
   221  	defer os.RemoveAll(localTmpLogPath)
   222  
   223  	prepareLogfiles(localTmpLogPath)
   224  	if err := uploadLogfilesToGCS(localTmpLogPath); err != nil {
   225  		glog.Fatalf("Could not upload logs to GCS: %v", err)
   226  	}
   227  	glog.Infof("Logs successfully uploaded")
   228  
   229  	glog.Infof("Entering sleep for a duration of %v seconds", *sleepDuration)
   230  	time.Sleep(*sleepDuration)
   231  }