github.com/honeycombio/honeytail@v1.9.0/main.go (about) 1 package main 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "math/rand" 8 "os" 9 "path/filepath" 10 "reflect" 11 "regexp" 12 "strings" 13 "time" 14 15 "github.com/honeycombio/libhoney-go" 16 flag "github.com/jessevdk/go-flags" 17 "github.com/sirupsen/logrus" 18 "gopkg.in/yaml.v3" 19 20 "github.com/honeycombio/honeytail/httime" 21 "github.com/honeycombio/honeytail/parsers/arangodb" 22 "github.com/honeycombio/honeytail/parsers/csv" 23 "github.com/honeycombio/honeytail/parsers/htjson" 24 "github.com/honeycombio/honeytail/parsers/keyval" 25 "github.com/honeycombio/honeytail/parsers/mongodb" 26 "github.com/honeycombio/honeytail/parsers/mysql" 27 "github.com/honeycombio/honeytail/parsers/nginx" 28 "github.com/honeycombio/honeytail/parsers/postgresql" 29 "github.com/honeycombio/honeytail/parsers/regex" 30 "github.com/honeycombio/honeytail/parsers/syslog" 31 "github.com/honeycombio/honeytail/tail" 32 ) 33 34 // BuildID is set by Travis CI 35 var BuildID string 36 37 // internal version identifier 38 var version string 39 40 var validParsers = []string{ 41 "arangodb", 42 "csv", 43 "json", 44 "keyval", 45 "mongo", 46 "mysql", 47 "nginx", 48 "postgresql", 49 "regex", 50 "syslog", 51 } 52 53 // GlobalOptions has all the top level CLI flags that honeytail supports 54 type GlobalOptions struct { 55 APIHost string `long:"api_host" description:"Host for the Honeycomb API" default:"https://api.honeycomb.io/"` 56 TailSample bool `hidden:"true" description:"When true, sample while tailing. When false, sample post-parser events" yaml:"-"` 57 58 ConfigFile string `short:"c" long:"config" description:"Config file for honeytail in INI format." no-ini:"true" yaml:"-"` 59 ConfigYaml string `long:"config_yaml" description:"Config file for honeytail in YAML format." yaml:"-"` 60 WriteYaml string `long:"write_yaml" description:"When specified (a filename), parse the existing config, then write a new YAML config to the specified YAML file and quit." yaml:"-"` 61 62 SampleRate uint `short:"r" long:"samplerate" description:"Only send 1 / N log lines" default:"1" yaml:"samplerate"` 63 NumSenders uint `short:"P" long:"poolsize" description:"Number of concurrent connections to open to Honeycomb" default:"80" yaml:"poolsize"` 64 BatchFrequencyMs uint `long:"send_frequency_ms" description:"How frequently to flush batches" default:"100" yaml:"send_frequency_ms"` 65 BatchSize uint `long:"send_batch_size" description:"Maximum number of messages to put in a batch" default:"50" yaml:"send_batch_size"` 66 Debug bool `long:"debug" description:"Print debugging output" yaml:"debug,omitempty"` 67 DebugOut bool `long:"debug_stdout" description:"Instead of sending events to Honeycomb, print them to STDOUT for debugging" yaml:"debug_stdout,omitempty"` 68 StatusInterval uint `long:"status_interval" description:"How frequently, in seconds, to print out summary info" default:"60" yaml:"status_interval"` 69 Backfill bool `long:"backfill" description:"Configure honeytail to ingest old data in order to backfill Honeycomb. Sets the correct values for --backoff, --tail.read_from, and --tail.stop" yaml:"backfill,omitempty"` 70 RebaseTime bool `long:"rebase_time" description:"When backfilling data, rebase timestamps relative to the current time." yaml:"rebase_time,omitempty"` 71 72 Localtime bool `long:"localtime" description:"When parsing a timestamp that has no time zone, assume it is in the same timezone as localhost instead of UTC (the default)" yaml:"localtime,omitempty"` 73 Timezone string `long:"timezone" description:"When parsing a timestamp use this time zone instead of UTC (the default). Must be specified in TZ format as seen here: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones" yaml:"timezone,omitempty"` 74 ScrubFields []string `long:"scrub_field" description:"For the field listed, apply a one-way hash to the field content. May have multiple values." yaml:"scrub_field,omitempty"` 75 DropFields []string `long:"drop_field" description:"Do not send the field to Honeycomb. May have multiple values." yaml:"drop_field,omitempty"` 76 AddFields []string `long:"add_field" description:"Add the field to every event. Field should be key=val. May have multiple values." yaml:"add_field,omitempty"` 77 DAMapFile string `long:"da_map_file" description:"Data Augmentation Map file. Path to a file that contains JSON mapping of columns to augment, the values of the column, and new objects to be inserted into the event, eg to add hostname based on IP address or username based on user ID." yaml:"da_map_file,omitempty"` 78 RequestShape []string `long:"request_shape" description:"Identify a field that contains an HTTP request of the form 'METHOD /path HTTP/1.x' or just the request path. Break apart that field into subfields that contain components. May have multiple values. Defaults to 'request' when using the nginx parser." yaml:"request_shape,omitempty"` 79 ShapePrefix string `long:"shape_prefix" description:"Prefix to use on fields generated from request_shape to prevent field collision" yaml:"shape_prefix,omitempty"` 80 RequestPattern []string `long:"request_pattern" description:"A pattern for the request path on which to base the derived request_shape. May have multiple values. Patterns are considered in order; first match wins." yaml:"request_pattern,omitempty"` 81 RequestParseQuery string `long:"request_parse_query" description:"How to parse the request query parameters. 'whitelist' means only extract listed query keys. 'all' means to extract all query parameters as individual columns" default:"whitelist" yaml:"request_parse_query"` 82 RequestQueryKeys []string `long:"request_query_keys" description:"Request query parameter key names to extract, when request_parse_query is 'whitelist'. May have multiple values." yaml:"request_query_keys,omitempty"` 83 BackOff bool `long:"backoff" description:"When rate limited by the API, back off and retry sending failed events. Otherwise failed events are dropped. When --backfill is set, it will force this to true." yaml:"backoff,omitempty"` 84 PrefixRegex string `long:"log_prefix" description:"pass a regex to this flag to strip the matching prefix from the line before handing to the parser. Useful when log aggregation prepends a line header. Use named groups to extract fields into the event." yaml:"log_prefix,omitempty"` 85 DeterministicSample string `long:"deterministic_sampling" description:"Specify a field to deterministically sample on, i.e., every concurrent Honeytail instance will sample 1/N based on content." yaml:"deterministic_sampling,omitempty"` 86 DynSample []string `long:"dynsampling" description:"Enable dynamic sampling using the field listed in this option. May have multiple values; fields will be concatenated to form the dynsample key. WARNING: increases CPU utilization dramatically over normal sampling." yaml:"dynsampling,omitempty"` 87 DynWindowSec int `long:"dynsample_window" description:"Set measurement window size for the dynsampler, in seconds." default:"30" yaml:"dynsample_window"` 88 PreSampledField string `long:"presampled" description:"If this log has already been sampled, specify the field containing the sample rate here and it will be passed along unchanged." yaml:"presampled,omitempty"` 89 GoalSampleRate int `hidden:"true" description:"Used to hold the desired sample rate and set tailing sample rate to 1." yaml:"-"` 90 MinSampleRate int `long:"dynsample_minimum" description:"If the rate of traffic falls below this, dynsampler won't sample." default:"1" yaml:"dynsample_minimum"` 91 JSONFields []string `long:"json_field" description:"JSON fields encoded as string to unescape and properly parse before sending. May have multiple values." yaml:"json_field,omitempty"` 92 FilterFiles []string `short:"F" long:"filter-file" description:"Log file(s) to exclude from --file glob. May have multiple values, including multiple globs." yaml:"filter-file,omitempty"` 93 RenameFields []string `long:"rename_field" description:"Format: 'before=after'. Rename field called 'before' from parsed lines to field name 'after' in Honeycomb events. May have multiple values." yaml:"rename_field,omitempty"` 94 95 LogLevel string `long:"log_level" description:"Set the log level. Valid values are 'debug', 'info', 'warn', 'error', 'fatal', 'panic'." default:"info" yaml:"log_level,omitempty"` 96 97 Reqs RequiredOptions `group:"Required Options" yaml:"required_options,omitempty"` 98 Modes OtherModes `group:"Other Modes" yaml:"-"` 99 100 Tail tail.TailOptions `group:"Tail Options" namespace:"tail" yaml:",omitempty"` 101 102 ArangoDB arangodb.Options `group:"ArangoDB Parser Options" namespace:"arangodb" yaml:",omitempty"` 103 CSV csv.Options `group:"CSV Parser Options" namespace:"csv" yaml:",omitempty"` 104 JSON htjson.Options `group:"JSON Parser Options" namespace:"json" yaml:",omitempty"` 105 KeyVal keyval.Options `group:"KeyVal Parser Options" namespace:"keyval" yaml:",omitempty"` 106 Mongo mongodb.Options `group:"MongoDB Parser Options" namespace:"mongo" yaml:",omitempty"` 107 MySQL mysql.Options `group:"MySQL Parser Options" namespace:"mysql" yaml:",omitempty"` 108 Nginx nginx.Options `group:"Nginx Parser Options" namespace:"nginx" yaml:",omitempty"` 109 PostgreSQL postgresql.Options `group:"PostgreSQL Parser Options" namespace:"postgresql" yaml:",omitempty"` 110 Regex regex.Options `group:"Regex Parser Options" namespace:"regex" yaml:",omitempty"` 111 Syslog syslog.Options `group:"Syslog Parser Options" namespace:"syslog" yaml:",omitempty"` 112 } 113 114 type RequiredOptions struct { 115 ParserName string `short:"p" long:"parser" description:"Parser module to use. Use --list to list available options."` 116 WriteKey string `short:"k" long:"writekey" description:"Team write key"` 117 LogFiles []string `short:"f" long:"file" description:"Log file(s) to parse. Use '-' for STDIN, use this flag multiple times to tail multiple files, or use a glob (/path/to/foo-*.log)"` 118 Dataset string `short:"d" long:"dataset" description:"Name of the dataset"` 119 } 120 121 type OtherModes struct { 122 Help bool `short:"h" long:"help" description:"Show this help message."` 123 ListParsers bool `short:"l" long:"list" description:"List available parsers."` 124 Version bool `short:"V" long:"version" description:"Show version."` 125 WriteDefaultConfig bool `long:"write_default_config" description:"Write a default config file to STDOUT." no-ini:"true"` 126 WriteCurrentConfig bool `long:"write_current_config" description:"Write out the current config to STDOUT." no-ini:"true"` 127 WriteCurrentYaml bool `long:"write_current_yaml" description:"Write out the current config to STDOUT as YAML." no-ini:"true"` 128 129 WriteManPage bool `hidden:"true" long:"write-man-page" description:"Write out a man page."` 130 } 131 132 func main() { 133 var options GlobalOptions 134 flagParser := flag.NewParser(&options, flag.PrintErrors) 135 flagParser.Usage = `-p <parser> -k <writekey> -f </path/to/logfile> -d <mydata> [optional arguments] 136 137 See https://honeycomb.io/docs/connect/agent/ for more detailed usage instructions.` 138 139 if extraArgs, err := flagParser.Parse(); err != nil || len(extraArgs) != 0 { 140 fmt.Println("Error: failed to parse the command line.") 141 if err != nil { 142 fmt.Printf("\t%s\n", err) 143 } else { 144 fmt.Printf("\tUnexpected extra arguments: %s\n", strings.Join(extraArgs, " ")) 145 } 146 usage() 147 os.Exit(1) 148 } 149 // read the config file if present 150 if options.ConfigFile != "" { 151 ini := flag.NewIniParser(flagParser) 152 ini.ParseAsDefaults = true 153 if err := ini.ParseFile(options.ConfigFile); err != nil { 154 fmt.Printf("Error: failed to parse the config file %s\n", options.ConfigFile) 155 fmt.Printf("\t%s\n", err) 156 usage() 157 os.Exit(1) 158 } 159 } 160 161 if options.ConfigYaml != "" { 162 f, err := os.Open(options.ConfigYaml) 163 if err != nil { 164 fmt.Println("error opening " + options.ConfigYaml) 165 os.Exit(1) 166 } 167 b, err := io.ReadAll(f) 168 if err != nil { 169 fmt.Println("error reading " + options.ConfigYaml) 170 os.Exit(1) 171 } 172 yaml.Unmarshal(b, &options) 173 } 174 175 rand.Seed(time.Now().UnixNano()) 176 177 if options.Debug { 178 logrus.SetLevel(logrus.DebugLevel) 179 } else if options.LogLevel != "" { 180 level, err := logrus.ParseLevel(options.LogLevel) 181 if err != nil { 182 fmt.Printf("invalid log level: %s\n", options.LogLevel) 183 os.Exit(1) 184 } 185 logrus.SetLevel(level) 186 } 187 188 // Support flag alias: --backfill should cover --backoff --tail.read_from=beginning --tail.stop 189 if options.Backfill { 190 options.BackOff = true 191 options.Tail.ReadFrom = "beginning" 192 options.Tail.Stop = true 193 } 194 195 // set time zone info 196 if options.Localtime { 197 httime.Location = time.Now().Location() 198 } 199 if options.Timezone != "" { 200 loc, err := time.LoadLocation(options.Timezone) 201 if err != nil { 202 fmt.Printf("time zone '%s' not successfully parsed.\n", options.Timezone) 203 fmt.Printf("see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones for a list of time zones\n") 204 fmt.Printf("expected format example: America/Los_Angeles\n") 205 fmt.Printf("Specific error: %s\n", err.Error()) 206 os.Exit(1) 207 } 208 httime.Location = loc 209 } 210 211 setVersionUserAgent(options.Backfill, options.Reqs.ParserName) 212 handleOtherModes(flagParser, options) 213 addParserDefaultOptions(&options) 214 sanityCheckOptions(&options) 215 216 if !options.DebugOut { 217 if _, err := libhoney.VerifyAPIKey(libhoney.Config{ 218 APIHost: options.APIHost, 219 WriteKey: options.Reqs.WriteKey, 220 }); err != nil { 221 fmt.Fprintln(os.Stderr, "Could not verify Honeycomb write key: ", err) 222 os.Exit(1) 223 } 224 } else { 225 logrus.Debug("skipping Honeycomb write key verification, because --debug_stdout is set...") 226 } 227 228 logrus.Debug("parsed arguments: ", structToString(options)) 229 230 run(context.Background(), options) 231 } 232 233 // convert options struct to a comma separated list of key=value pairs (for debugging) 234 func structToString(s interface{}) string { 235 v := reflect.ValueOf(s) 236 t := v.Type() 237 fields := make([]string, v.NumField()) 238 239 isExported := func(s string) bool { 240 f := s[0:1] 241 return f == strings.ToUpper(f) 242 } 243 244 for i := 0; i < v.NumField(); i++ { 245 if v.Field(i).Kind() == reflect.Struct { 246 fields[i] = structToString(v.Field(i).Interface()) 247 } else { 248 name := t.Field(i).Name 249 if isExported(name) { 250 value := v.Field(i).Interface() 251 if name == "WriteKey" { 252 value = "[REDACTED]" 253 } 254 fields[i] = fmt.Sprintf("%s.%s=%v", t, name, value) 255 } 256 } 257 } 258 return strings.Join(fields, ",") 259 } 260 261 // setVersion sets the internal version ID and updates libhoney's user-agent 262 func setVersionUserAgent(backfill bool, parserName string) { 263 if BuildID == "" { 264 version = "dev" 265 } else { 266 version = BuildID 267 } 268 if backfill { 269 parserName += " backfill" 270 } 271 libhoney.UserAgentAddition = fmt.Sprintf("honeytail/%s (%s)", version, parserName) 272 } 273 274 // handleOtherModes takes care of all flags that say we should just do something 275 // and exit rather than actually parsing logs 276 func handleOtherModes(fp *flag.Parser, options GlobalOptions) { 277 modes := options.Modes 278 if modes.Version { 279 fmt.Println("Honeytail version", version) 280 os.Exit(0) 281 } 282 if modes.Help { 283 fp.WriteHelp(os.Stdout) 284 fmt.Println("") 285 os.Exit(0) 286 } 287 if modes.WriteManPage { 288 fp.WriteManPage(os.Stdout) 289 os.Exit(0) 290 } 291 if modes.WriteDefaultConfig { 292 ip := flag.NewIniParser(fp) 293 ip.Write(os.Stdout, flag.IniIncludeDefaults|flag.IniCommentDefaults|flag.IniIncludeComments) 294 os.Exit(0) 295 } 296 if modes.WriteCurrentConfig { 297 ip := flag.NewIniParser(fp) 298 ip.Write(os.Stdout, flag.IniIncludeComments) 299 os.Exit(0) 300 } 301 if modes.WriteCurrentYaml { 302 y, err := yaml.Marshal(options) 303 if err != nil { 304 fmt.Println("unable to marshal options to YAML!") 305 os.Exit(1) 306 } 307 os.Stdout.Write(y) 308 os.Exit(0) 309 } 310 311 if modes.ListParsers { 312 fmt.Println("Available parsers:", strings.Join(validParsers, ", ")) 313 os.Exit(0) 314 } 315 } 316 317 func addParserDefaultOptions(options *GlobalOptions) { 318 switch { 319 case options.Reqs.ParserName == "nginx": 320 // automatically normalize the request when using the nginx parser 321 options.RequestShape = append(options.RequestShape, "request") 322 } 323 if options.Reqs.ParserName != "mysql" { 324 // mysql is the only parser that requires in-parser sampling because it has 325 // a multi-line log format. 326 // Sample all other parser when tailing to conserve CPU 327 options.TailSample = true 328 } else { 329 options.TailSample = false 330 } 331 if options.DeterministicSample != "" { 332 options.TailSample = false 333 } 334 if len(options.DynSample) != 0 { 335 // when using dynamic sampling, we make the sampling decision after parsing 336 // the content, so we must not tailsample. 337 options.TailSample = false 338 options.GoalSampleRate = int(options.SampleRate) 339 options.SampleRate = 1 340 } 341 } 342 343 func sanityCheckOptions(options *GlobalOptions) { 344 switch { 345 case options.Reqs.ParserName == "": 346 fmt.Println("Parser required to be specified with the --parser flag.") 347 usage() 348 os.Exit(1) 349 case (options.Reqs.WriteKey == "" || options.Reqs.WriteKey == "NULL") && (!options.DebugOut): 350 fmt.Println("Write key required to be specified with the --writekey flag.") 351 usage() 352 os.Exit(1) 353 case len(options.Reqs.LogFiles) == 0: 354 fmt.Println("Log file name or '-' required to be specified with the --file flag.") 355 usage() 356 os.Exit(1) 357 case options.Reqs.Dataset == "": 358 fmt.Println("Dataset name required with the --dataset flag.") 359 usage() 360 os.Exit(1) 361 case options.SampleRate == 0: 362 fmt.Println("Sample rate must be an integer >= 1") 363 usage() 364 os.Exit(1) 365 case options.Tail.ReadFrom == "end" && options.Tail.Stop: 366 fmt.Println("Reading from the end and stopping when we get there. Zero lines to process. Ok, all done! ;)") 367 usage() 368 os.Exit(1) 369 case options.RequestParseQuery != "whitelist" && options.RequestParseQuery != "all": 370 fmt.Println("request_parse_query flag must be either 'whitelist' or 'all'.") 371 usage() 372 os.Exit(1) 373 case (len(options.DynSample) != 0 || options.DeterministicSample != "") && options.SampleRate <= 1 && options.GoalSampleRate <= 1: 374 fmt.Println("sample rate flag must be set >= 2 when dynamic or deterministic sampling is enabled") 375 usage() 376 os.Exit(1) 377 case len(options.DynSample) != 0 && options.DeterministicSample != "": 378 fmt.Println("dynamic sampling and deterministic sampling cannot be used together") 379 usage() 380 os.Exit(1) 381 case options.DeterministicSample != "" && options.PreSampledField != "": 382 fmt.Println("deterministic sampling and a presampled field cannot be used together") 383 usage() 384 os.Exit(1) 385 case options.SampleRate != 1 && options.PreSampledField != "": 386 fmt.Println("sampling and a presampled field cannot be used together") 387 usage() 388 os.Exit(1) 389 } 390 391 // check the prefix regex for validity 392 if options.PrefixRegex != "" { 393 // make sure the regex is anchored against the start of the string 394 if options.PrefixRegex[0] != '^' { 395 options.PrefixRegex = "^" + options.PrefixRegex 396 } 397 // make sure it's valid 398 _, err := regexp.Compile(options.PrefixRegex) 399 if err != nil { 400 fmt.Printf("Prefix regex %s doesn't compile: error %s\n", options.PrefixRegex, err) 401 usage() 402 os.Exit(1) 403 } 404 } 405 406 // Make sure input files exist 407 shouldExit := false 408 for _, f := range options.Reqs.LogFiles { 409 if f == "-" { 410 continue 411 } 412 if files, err := filepath.Glob(f); err != nil || files == nil { 413 fmt.Printf("Log file specified by --file=%s not found!\n", f) 414 shouldExit = true 415 } 416 } 417 if shouldExit { 418 usage() 419 os.Exit(1) 420 } 421 } 422 423 func usage() { 424 fmt.Print(` 425 Usage: honeytail -p <parser> -k <writekey> -f </path/to/logfile> -d <mydata> [optional arguments] 426 427 For even more detail on required and optional parameters, run 428 honeytail --help 429 `) 430 }