github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/logexporter/cmd/main.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // TODO(shyamjvs): Make this exporter work for master too, currently facing 18 // gcloud auth error when run from within a pod on the master. 19 20 package main 21 22 import ( 23 "flag" 24 "fmt" 25 "io" 26 "io/ioutil" 27 "os" 28 "os/exec" 29 "path/filepath" 30 "time" 31 32 "github.com/golang/glog" 33 ) 34 35 // Initialize the log exporter's configuration related flags. 36 var ( 37 nodeName = flag.String("node-name", "", "Name of the node this log exporter is running on") 38 gcsPath = flag.String("gcs-path", "", "Path to the GCS directory under which to upload logs, for eg: gs://my-logs-bucket/logs") 39 cloudProvider = flag.String("cloud-provider", "", "Cloud provider for this node (gce/gke/aws/kubemark/..)") 40 gcloudAuthFilePath = flag.String("gcloud-auth-file-path", "/etc/service-account/service-account.json", "Path to gcloud service account file, for authenticating gsutil to write to GCS bucket") 41 enableHollowNodeLogs = flag.Bool("enable-hollow-node-logs", false, "Enable uploading hollow node logs too. Relevant only for kubemark nodes") 42 sleepDuration = flag.Duration("sleep-duration", 60*time.Second, "Duration to sleep before exiting with success. Useful for making pods schedule with hard anti-affinity when run as a job on a k8s cluster") 43 ) 44 45 var ( 46 localLogPath = "/var/log" 47 48 // Node-type specific logfiles. 49 // Currently we only handle nodes, and neglect master. 50 nodeLogs = []string{"kube-proxy", "node-problem-detector", "fluentd"} 51 52 // Cloud provider specific logfiles. 53 awsLogs = []string{"cloud-init-output"} 54 gceLogs = []string{"startupscript"} 55 kubemarkLogs = []string{"*-hollow-node-*"} 56 57 // System services/kernel related logfiles. 58 kernelLog = "kern" 59 initdLogs = []string{"docker"} 60 supervisordLogs = []string{"kubelet", "supervisor/supervisord", "supervisor/kubelet-stdout", "supervisor/kubelet-stderr", "supervisor/docker-stdout", "supervisor/docker-stderr"} 61 systemdServices = []string{"kern", "kubelet", "docker"} 62 systemdSetupServices = []string{"kube-node-installation", "kube-node-configuration"} 63 nodeSystemdServices = []string{"node-problem-detector"} 64 ) 65 66 // Check if the config provided through the flags take valid values. 67 func checkConfigValidity() error { 68 glog.Info("Verifying if a valid config has been provided through the flags") 69 if *nodeName == "" { 70 return fmt.Errorf("Flag --node-name has its value unspecified") 71 } 72 if *gcsPath == "" { 73 return fmt.Errorf("Flag --gcs-path has its value unspecified") 74 } 75 if _, err := os.Stat(*gcloudAuthFilePath); err != nil { 76 return fmt.Errorf("Could not find the gcloud service account file: %v", err) 77 } else { 78 cmd := exec.Command("gcloud", "auth", "activate-service-account", "--key-file="+*gcloudAuthFilePath) 79 if err := cmd.Run(); err != nil { 80 return fmt.Errorf("Failed to activate gcloud service account: %v", err) 81 } 82 } 83 return nil 84 } 85 86 // Create logfile for systemd service in outputDir with the given journalctl outputMode. 87 func createSystemdLogfile(service string, outputMode string, outputDir string) error { 88 // Generate the journalctl command. 89 journalCmdArgs := []string{fmt.Sprintf("--output=%v", outputMode), "-D", "/var/log/journal"} 90 if service == "kern" { 91 journalCmdArgs = append(journalCmdArgs, "-k") 92 } else { 93 journalCmdArgs = append(journalCmdArgs, "-u", fmt.Sprintf("%v.service", service)) 94 } 95 cmd := exec.Command("journalctl", journalCmdArgs...) 96 97 // Run the command and record the output to a file. 98 output, err := cmd.Output() 99 if err != nil { 100 return fmt.Errorf("Journalctl command for '%v' service failed: %v", service, err) 101 } 102 logfile := filepath.Join(outputDir, service+".log") 103 if err := ioutil.WriteFile(logfile, output, 0444); err != nil { 104 return fmt.Errorf("Writing to file of journalctl logs for '%v' service failed: %v", service, err) 105 } 106 return nil 107 } 108 109 // Create logfiles for systemd services in outputDir. 110 func createSystemdLogfiles(outputDir string) { 111 services := append(systemdServices, nodeSystemdServices...) 112 for _, service := range services { 113 if err := createSystemdLogfile(service, "cat", outputDir); err != nil { 114 glog.Warningf("Failed to record journalctl logs: %v", err) 115 } 116 } 117 // Service logs specific to VM setup. 118 for _, service := range systemdSetupServices { 119 if err := createSystemdLogfile(service, "short-precise", outputDir); err != nil { 120 glog.Warningf("Failed to record journalctl logs: %v", err) 121 } 122 } 123 } 124 125 // Copy logfiles specific to this node based on the cloud-provider, system services, etc 126 // to a temporary directory. Also create logfiles for systemd services if journalctl is present. 127 // We do not expect this function to see an error. 128 func prepareLogfiles(logDir string) { 129 glog.Info("Preparing logfiles relevant to this node") 130 logfiles := nodeLogs[:] 131 132 switch *cloudProvider { 133 case "gce", "gke": 134 logfiles = append(logfiles, gceLogs...) 135 case "aws": 136 logfiles = append(logfiles, awsLogs...) 137 default: 138 glog.Errorf("Unknown cloud provider '%v' provided, skipping any provider specific logs", *cloudProvider) 139 } 140 141 // Grab kubemark logs too, if asked for. 142 if *enableHollowNodeLogs { 143 logfiles = append(logfiles, kubemarkLogs...) 144 } 145 146 // Select system/service specific logs. 147 if _, err := os.Stat("/workspace/etc/systemd/journald.conf"); err == nil { 148 glog.Info("Journalctl found on host. Collecting systemd logs") 149 createSystemdLogfiles(logDir) 150 } else { 151 glog.Infof("Journalctl not found on host (%v). Collecting supervisord logs instead", err) 152 logfiles = append(logfiles, kernelLog) 153 logfiles = append(logfiles, initdLogs...) 154 logfiles = append(logfiles, supervisordLogs...) 155 } 156 157 // Copy all the logfiles that exist, to logDir. 158 for _, logfile := range logfiles { 159 logfileFullPath := filepath.Join(localLogPath, logfile+".log*") // Append .log* to copy rotated logs too. 160 cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("cp %v %v", logfileFullPath, logDir)) 161 if err := cmd.Run(); err != nil { 162 glog.Warningf("Failed to copy any logfiles with pattern '%v': %v", logfileFullPath, err) 163 } 164 } 165 } 166 167 func uploadLogfilesToGCS(logDir string) error { 168 cmd := exec.Command("/bin/sh", "-c", fmt.Sprintf("ls %v/*", logDir)) 169 if output, err := cmd.Output(); err != nil { 170 return fmt.Errorf("Could not list any logfiles: %v", err) 171 } else { 172 glog.Infof("List of logfiles available: %v", string(output)) 173 } 174 175 gcsLogPath := *gcsPath + "/" + *nodeName 176 glog.Infof("Uploading logfiles to GCS at path '%v'", gcsLogPath) 177 var err error 178 for uploadAttempt := 0; uploadAttempt < 3; uploadAttempt++ { 179 // Upload the files with compression (-z) and parallelism (-m) for speeding 180 // up, and set their ACL to make them publicly readable. 181 cmd := exec.Command("gsutil", "-m", "-q", "cp", "-a", "public-read", "-c", 182 "-z", "log,txt,xml", logDir+"/*", gcsLogPath) 183 if err = cmd.Run(); err != nil { 184 glog.Errorf("Attempt %v to upload to GCS failed: %v", uploadAttempt, err) 185 continue 186 } 187 return writeSuccessMarkerFile() 188 } 189 return fmt.Errorf("Multiple attempts of gsutil failed, the final one due to: %v", err) 190 } 191 192 // Write a marker file to GCS named after this node to indicate logexporter's success. 193 // The directory to which we write this file can then be used as a registry to quickly 194 // fetch the list of nodes on which logexporter succeeded. 195 func writeSuccessMarkerFile() error { 196 markerFilePath := *gcsPath + "/logexported-nodes-registry/" + *nodeName + ".txt" 197 cmd := exec.Command("gsutil", "-q", "cp", "-a", "public-read", "-", markerFilePath) 198 stdin, err := cmd.StdinPipe() 199 if err != nil { 200 return fmt.Errorf("Failed to get stdin pipe to write marker file: %v", err) 201 } 202 io.WriteString(stdin, "") 203 stdin.Close() 204 if err = cmd.Run(); err != nil { 205 return fmt.Errorf("Failed to write marker file to GCS: %v", err) 206 } 207 return nil 208 } 209 210 func main() { 211 flag.Parse() 212 if err := checkConfigValidity(); err != nil { 213 glog.Fatalf("Bad config provided: %v", err) 214 } 215 216 localTmpLogPath, err := ioutil.TempDir("/tmp", "k8s-systemd-logs") 217 if err != nil { 218 glog.Fatalf("Could not create temporary dir locally for copying logs: %v", err) 219 } 220 defer os.RemoveAll(localTmpLogPath) 221 222 prepareLogfiles(localTmpLogPath) 223 if err := uploadLogfilesToGCS(localTmpLogPath); err != nil { 224 glog.Fatalf("Could not upload logs to GCS: %v", err) 225 } 226 glog.Info("Logs successfully uploaded") 227 228 glog.Infof("Entering sleep for a duration of %v seconds", *sleepDuration) 229 time.Sleep(*sleepDuration) 230 }