github.com/honeycombio/honeytail@v1.9.0/main.go (about)

     1  package main
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"math/rand"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"regexp"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/honeycombio/libhoney-go"
    16  	flag "github.com/jessevdk/go-flags"
    17  	"github.com/sirupsen/logrus"
    18  	"gopkg.in/yaml.v3"
    19  
    20  	"github.com/honeycombio/honeytail/httime"
    21  	"github.com/honeycombio/honeytail/parsers/arangodb"
    22  	"github.com/honeycombio/honeytail/parsers/csv"
    23  	"github.com/honeycombio/honeytail/parsers/htjson"
    24  	"github.com/honeycombio/honeytail/parsers/keyval"
    25  	"github.com/honeycombio/honeytail/parsers/mongodb"
    26  	"github.com/honeycombio/honeytail/parsers/mysql"
    27  	"github.com/honeycombio/honeytail/parsers/nginx"
    28  	"github.com/honeycombio/honeytail/parsers/postgresql"
    29  	"github.com/honeycombio/honeytail/parsers/regex"
    30  	"github.com/honeycombio/honeytail/parsers/syslog"
    31  	"github.com/honeycombio/honeytail/tail"
    32  )
    33  
    34  // BuildID is set by Travis CI
    35  var BuildID string
    36  
    37  // internal version identifier
    38  var version string
    39  
    40  var validParsers = []string{
    41  	"arangodb",
    42  	"csv",
    43  	"json",
    44  	"keyval",
    45  	"mongo",
    46  	"mysql",
    47  	"nginx",
    48  	"postgresql",
    49  	"regex",
    50  	"syslog",
    51  }
    52  
    53  // GlobalOptions has all the top level CLI flags that honeytail supports
    54  type GlobalOptions struct {
    55  	APIHost    string `long:"api_host" description:"Host for the Honeycomb API" default:"https://api.honeycomb.io/"`
    56  	TailSample bool   `hidden:"true" description:"When true, sample while tailing. When false, sample post-parser events" yaml:"-"`
    57  
    58  	ConfigFile string `short:"c" long:"config" description:"Config file for honeytail in INI format." no-ini:"true" yaml:"-"`
    59  	ConfigYaml string `long:"config_yaml" description:"Config file for honeytail in YAML format." yaml:"-"`
    60  	WriteYaml  string `long:"write_yaml" description:"When specified (a filename), parse the existing config, then write a new YAML config to the specified YAML file and quit." yaml:"-"`
    61  
    62  	SampleRate       uint `short:"r" long:"samplerate" description:"Only send 1 / N log lines" default:"1" yaml:"samplerate"`
    63  	NumSenders       uint `short:"P" long:"poolsize" description:"Number of concurrent connections to open to Honeycomb" default:"80" yaml:"poolsize"`
    64  	BatchFrequencyMs uint `long:"send_frequency_ms" description:"How frequently to flush batches" default:"100" yaml:"send_frequency_ms"`
    65  	BatchSize        uint `long:"send_batch_size" description:"Maximum number of messages to put in a batch" default:"50" yaml:"send_batch_size"`
    66  	Debug            bool `long:"debug" description:"Print debugging output" yaml:"debug,omitempty"`
    67  	DebugOut         bool `long:"debug_stdout" description:"Instead of sending events to Honeycomb, print them to STDOUT for debugging" yaml:"debug_stdout,omitempty"`
    68  	StatusInterval   uint `long:"status_interval" description:"How frequently, in seconds, to print out summary info" default:"60" yaml:"status_interval"`
    69  	Backfill         bool `long:"backfill" description:"Configure honeytail to ingest old data in order to backfill Honeycomb. Sets the correct values for --backoff, --tail.read_from, and --tail.stop" yaml:"backfill,omitempty"`
    70  	RebaseTime       bool `long:"rebase_time" description:"When backfilling data, rebase timestamps relative to the current time." yaml:"rebase_time,omitempty"`
    71  
    72  	Localtime           bool     `long:"localtime" description:"When parsing a timestamp that has no time zone, assume it is in the same timezone as localhost instead of UTC (the default)" yaml:"localtime,omitempty"`
    73  	Timezone            string   `long:"timezone" description:"When parsing a timestamp use this time zone instead of UTC (the default). Must be specified in TZ format as seen here: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones" yaml:"timezone,omitempty"`
    74  	ScrubFields         []string `long:"scrub_field" description:"For the field listed, apply a one-way hash to the field content. May have multiple values." yaml:"scrub_field,omitempty"`
    75  	DropFields          []string `long:"drop_field" description:"Do not send the field to Honeycomb. May have multiple values." yaml:"drop_field,omitempty"`
    76  	AddFields           []string `long:"add_field" description:"Add the field to every event. Field should be key=val. May have multiple values." yaml:"add_field,omitempty"`
    77  	DAMapFile           string   `long:"da_map_file" description:"Data Augmentation Map file. Path to a file that contains JSON mapping of columns to augment, the values of the column, and new objects to be inserted into the event, eg to add hostname based on IP address or username based on user ID." yaml:"da_map_file,omitempty"`
    78  	RequestShape        []string `long:"request_shape" description:"Identify a field that contains an HTTP request of the form 'METHOD /path HTTP/1.x' or just the request path. Break apart that field into subfields that contain components. May have multiple values. Defaults to 'request' when using the nginx parser." yaml:"request_shape,omitempty"`
    79  	ShapePrefix         string   `long:"shape_prefix" description:"Prefix to use on fields generated from request_shape to prevent field collision" yaml:"shape_prefix,omitempty"`
    80  	RequestPattern      []string `long:"request_pattern" description:"A pattern for the request path on which to base the derived request_shape. May have multiple values. Patterns are considered in order; first match wins." yaml:"request_pattern,omitempty"`
    81  	RequestParseQuery   string   `long:"request_parse_query" description:"How to parse the request query parameters. 'whitelist' means only extract listed query keys. 'all' means to extract all query parameters as individual columns" default:"whitelist" yaml:"request_parse_query"`
    82  	RequestQueryKeys    []string `long:"request_query_keys" description:"Request query parameter key names to extract, when request_parse_query is 'whitelist'. May have multiple values." yaml:"request_query_keys,omitempty"`
    83  	BackOff             bool     `long:"backoff" description:"When rate limited by the API, back off and retry sending failed events. Otherwise failed events are dropped. When --backfill is set, it will force this to true." yaml:"backoff,omitempty"`
    84  	PrefixRegex         string   `long:"log_prefix" description:"pass a regex to this flag to strip the matching prefix from the line before handing to the parser. Useful when log aggregation prepends a line header. Use named groups to extract fields into the event." yaml:"log_prefix,omitempty"`
    85  	DeterministicSample string   `long:"deterministic_sampling" description:"Specify a field to deterministically sample on, i.e., every concurrent Honeytail instance will sample 1/N based on content." yaml:"deterministic_sampling,omitempty"`
    86  	DynSample           []string `long:"dynsampling" description:"Enable dynamic sampling using the field listed in this option. May have multiple values; fields will be concatenated to form the dynsample key. WARNING: increases CPU utilization dramatically over normal sampling." yaml:"dynsampling,omitempty"`
    87  	DynWindowSec        int      `long:"dynsample_window" description:"Set measurement window size for the dynsampler, in seconds." default:"30" yaml:"dynsample_window"`
    88  	PreSampledField     string   `long:"presampled" description:"If this log has already been sampled, specify the field containing the sample rate here and it will be passed along unchanged." yaml:"presampled,omitempty"`
    89  	GoalSampleRate      int      `hidden:"true" description:"Used to hold the desired sample rate and set tailing sample rate to 1." yaml:"-"`
    90  	MinSampleRate       int      `long:"dynsample_minimum" description:"If the rate of traffic falls below this, dynsampler won't sample." default:"1" yaml:"dynsample_minimum"`
    91  	JSONFields          []string `long:"json_field" description:"JSON fields encoded as string to unescape and properly parse before sending. May have multiple values." yaml:"json_field,omitempty"`
    92  	FilterFiles         []string `short:"F" long:"filter-file" description:"Log file(s) to exclude from --file glob. May have multiple values, including multiple globs." yaml:"filter-file,omitempty"`
    93  	RenameFields        []string `long:"rename_field" description:"Format: 'before=after'. Rename field called 'before' from parsed lines to field name 'after' in Honeycomb events. May have multiple values." yaml:"rename_field,omitempty"`
    94  
    95  	LogLevel string `long:"log_level" description:"Set the log level. Valid values are 'debug', 'info', 'warn', 'error', 'fatal', 'panic'." default:"info" yaml:"log_level,omitempty"`
    96  
    97  	Reqs  RequiredOptions `group:"Required Options" yaml:"required_options,omitempty"`
    98  	Modes OtherModes      `group:"Other Modes" yaml:"-"`
    99  
   100  	Tail tail.TailOptions `group:"Tail Options" namespace:"tail" yaml:",omitempty"`
   101  
   102  	ArangoDB   arangodb.Options   `group:"ArangoDB Parser Options" namespace:"arangodb" yaml:",omitempty"`
   103  	CSV        csv.Options        `group:"CSV Parser Options" namespace:"csv" yaml:",omitempty"`
   104  	JSON       htjson.Options     `group:"JSON Parser Options" namespace:"json" yaml:",omitempty"`
   105  	KeyVal     keyval.Options     `group:"KeyVal Parser Options" namespace:"keyval" yaml:",omitempty"`
   106  	Mongo      mongodb.Options    `group:"MongoDB Parser Options" namespace:"mongo" yaml:",omitempty"`
   107  	MySQL      mysql.Options      `group:"MySQL Parser Options" namespace:"mysql" yaml:",omitempty"`
   108  	Nginx      nginx.Options      `group:"Nginx Parser Options" namespace:"nginx" yaml:",omitempty"`
   109  	PostgreSQL postgresql.Options `group:"PostgreSQL Parser Options" namespace:"postgresql" yaml:",omitempty"`
   110  	Regex      regex.Options      `group:"Regex Parser Options" namespace:"regex" yaml:",omitempty"`
   111  	Syslog     syslog.Options     `group:"Syslog Parser Options" namespace:"syslog" yaml:",omitempty"`
   112  }
   113  
   114  type RequiredOptions struct {
   115  	ParserName string   `short:"p" long:"parser" description:"Parser module to use. Use --list to list available options."`
   116  	WriteKey   string   `short:"k" long:"writekey" description:"Team write key"`
   117  	LogFiles   []string `short:"f" long:"file" description:"Log file(s) to parse. Use '-' for STDIN, use this flag multiple times to tail multiple files, or use a glob (/path/to/foo-*.log)"`
   118  	Dataset    string   `short:"d" long:"dataset" description:"Name of the dataset"`
   119  }
   120  
   121  type OtherModes struct {
   122  	Help               bool `short:"h" long:"help" description:"Show this help message."`
   123  	ListParsers        bool `short:"l" long:"list" description:"List available parsers."`
   124  	Version            bool `short:"V" long:"version" description:"Show version."`
   125  	WriteDefaultConfig bool `long:"write_default_config" description:"Write a default config file to STDOUT." no-ini:"true"`
   126  	WriteCurrentConfig bool `long:"write_current_config" description:"Write out the current config to STDOUT." no-ini:"true"`
   127  	WriteCurrentYaml   bool `long:"write_current_yaml" description:"Write out the current config to STDOUT as YAML." no-ini:"true"`
   128  
   129  	WriteManPage bool `hidden:"true" long:"write-man-page" description:"Write out a man page."`
   130  }
   131  
   132  func main() {
   133  	var options GlobalOptions
   134  	flagParser := flag.NewParser(&options, flag.PrintErrors)
   135  	flagParser.Usage = `-p <parser> -k <writekey> -f </path/to/logfile> -d <mydata> [optional arguments]
   136  
   137  See https://honeycomb.io/docs/connect/agent/ for more detailed usage instructions.`
   138  
   139  	if extraArgs, err := flagParser.Parse(); err != nil || len(extraArgs) != 0 {
   140  		fmt.Println("Error: failed to parse the command line.")
   141  		if err != nil {
   142  			fmt.Printf("\t%s\n", err)
   143  		} else {
   144  			fmt.Printf("\tUnexpected extra arguments: %s\n", strings.Join(extraArgs, " "))
   145  		}
   146  		usage()
   147  		os.Exit(1)
   148  	}
   149  	// read the config file if present
   150  	if options.ConfigFile != "" {
   151  		ini := flag.NewIniParser(flagParser)
   152  		ini.ParseAsDefaults = true
   153  		if err := ini.ParseFile(options.ConfigFile); err != nil {
   154  			fmt.Printf("Error: failed to parse the config file %s\n", options.ConfigFile)
   155  			fmt.Printf("\t%s\n", err)
   156  			usage()
   157  			os.Exit(1)
   158  		}
   159  	}
   160  
   161  	if options.ConfigYaml != "" {
   162  		f, err := os.Open(options.ConfigYaml)
   163  		if err != nil {
   164  			fmt.Println("error opening " + options.ConfigYaml)
   165  			os.Exit(1)
   166  		}
   167  		b, err := io.ReadAll(f)
   168  		if err != nil {
   169  			fmt.Println("error reading " + options.ConfigYaml)
   170  			os.Exit(1)
   171  		}
   172  		yaml.Unmarshal(b, &options)
   173  	}
   174  
   175  	rand.Seed(time.Now().UnixNano())
   176  
   177  	if options.Debug {
   178  		logrus.SetLevel(logrus.DebugLevel)
   179  	} else if options.LogLevel != "" {
   180  		level, err := logrus.ParseLevel(options.LogLevel)
   181  		if err != nil {
   182  			fmt.Printf("invalid log level: %s\n", options.LogLevel)
   183  			os.Exit(1)
   184  		}
   185  		logrus.SetLevel(level)
   186  	}
   187  
   188  	// Support flag alias: --backfill should cover --backoff --tail.read_from=beginning --tail.stop
   189  	if options.Backfill {
   190  		options.BackOff = true
   191  		options.Tail.ReadFrom = "beginning"
   192  		options.Tail.Stop = true
   193  	}
   194  
   195  	// set time zone info
   196  	if options.Localtime {
   197  		httime.Location = time.Now().Location()
   198  	}
   199  	if options.Timezone != "" {
   200  		loc, err := time.LoadLocation(options.Timezone)
   201  		if err != nil {
   202  			fmt.Printf("time zone '%s' not successfully parsed.\n", options.Timezone)
   203  			fmt.Printf("see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones for a list of time zones\n")
   204  			fmt.Printf("expected format example: America/Los_Angeles\n")
   205  			fmt.Printf("Specific error: %s\n", err.Error())
   206  			os.Exit(1)
   207  		}
   208  		httime.Location = loc
   209  	}
   210  
   211  	setVersionUserAgent(options.Backfill, options.Reqs.ParserName)
   212  	handleOtherModes(flagParser, options)
   213  	addParserDefaultOptions(&options)
   214  	sanityCheckOptions(&options)
   215  
   216  	if !options.DebugOut {
   217  		if _, err := libhoney.VerifyAPIKey(libhoney.Config{
   218  			APIHost:  options.APIHost,
   219  			WriteKey: options.Reqs.WriteKey,
   220  		}); err != nil {
   221  			fmt.Fprintln(os.Stderr, "Could not verify Honeycomb write key: ", err)
   222  			os.Exit(1)
   223  		}
   224  	} else {
   225  		logrus.Debug("skipping Honeycomb write key verification, because --debug_stdout is set...")
   226  	}
   227  
   228  	logrus.Debug("parsed arguments: ", structToString(options))
   229  
   230  	run(context.Background(), options)
   231  }
   232  
   233  // convert options struct to a comma separated list of key=value pairs (for debugging)
   234  func structToString(s interface{}) string {
   235  	v := reflect.ValueOf(s)
   236  	t := v.Type()
   237  	fields := make([]string, v.NumField())
   238  
   239  	isExported := func(s string) bool {
   240  		f := s[0:1]
   241  		return f == strings.ToUpper(f)
   242  	}
   243  
   244  	for i := 0; i < v.NumField(); i++ {
   245  		if v.Field(i).Kind() == reflect.Struct {
   246  			fields[i] = structToString(v.Field(i).Interface())
   247  		} else {
   248  			name := t.Field(i).Name
   249  			if isExported(name) {
   250  				value := v.Field(i).Interface()
   251  				if name == "WriteKey" {
   252  					value = "[REDACTED]"
   253  				}
   254  				fields[i] = fmt.Sprintf("%s.%s=%v", t, name, value)
   255  			}
   256  		}
   257  	}
   258  	return strings.Join(fields, ",")
   259  }
   260  
   261  // setVersion sets the internal version ID and updates libhoney's user-agent
   262  func setVersionUserAgent(backfill bool, parserName string) {
   263  	if BuildID == "" {
   264  		version = "dev"
   265  	} else {
   266  		version = BuildID
   267  	}
   268  	if backfill {
   269  		parserName += " backfill"
   270  	}
   271  	libhoney.UserAgentAddition = fmt.Sprintf("honeytail/%s (%s)", version, parserName)
   272  }
   273  
   274  // handleOtherModes takes care of all flags that say we should just do something
   275  // and exit rather than actually parsing logs
   276  func handleOtherModes(fp *flag.Parser, options GlobalOptions) {
   277  	modes := options.Modes
   278  	if modes.Version {
   279  		fmt.Println("Honeytail version", version)
   280  		os.Exit(0)
   281  	}
   282  	if modes.Help {
   283  		fp.WriteHelp(os.Stdout)
   284  		fmt.Println("")
   285  		os.Exit(0)
   286  	}
   287  	if modes.WriteManPage {
   288  		fp.WriteManPage(os.Stdout)
   289  		os.Exit(0)
   290  	}
   291  	if modes.WriteDefaultConfig {
   292  		ip := flag.NewIniParser(fp)
   293  		ip.Write(os.Stdout, flag.IniIncludeDefaults|flag.IniCommentDefaults|flag.IniIncludeComments)
   294  		os.Exit(0)
   295  	}
   296  	if modes.WriteCurrentConfig {
   297  		ip := flag.NewIniParser(fp)
   298  		ip.Write(os.Stdout, flag.IniIncludeComments)
   299  		os.Exit(0)
   300  	}
   301  	if modes.WriteCurrentYaml {
   302  		y, err := yaml.Marshal(options)
   303  		if err != nil {
   304  			fmt.Println("unable to marshal options to YAML!")
   305  			os.Exit(1)
   306  		}
   307  		os.Stdout.Write(y)
   308  		os.Exit(0)
   309  	}
   310  
   311  	if modes.ListParsers {
   312  		fmt.Println("Available parsers:", strings.Join(validParsers, ", "))
   313  		os.Exit(0)
   314  	}
   315  }
   316  
   317  func addParserDefaultOptions(options *GlobalOptions) {
   318  	switch {
   319  	case options.Reqs.ParserName == "nginx":
   320  		// automatically normalize the request when using the nginx parser
   321  		options.RequestShape = append(options.RequestShape, "request")
   322  	}
   323  	if options.Reqs.ParserName != "mysql" {
   324  		// mysql is the only parser that requires in-parser sampling because it has
   325  		// a multi-line log format.
   326  		// Sample all other parser when tailing to conserve CPU
   327  		options.TailSample = true
   328  	} else {
   329  		options.TailSample = false
   330  	}
   331  	if options.DeterministicSample != "" {
   332  		options.TailSample = false
   333  	}
   334  	if len(options.DynSample) != 0 {
   335  		// when using dynamic sampling, we make the sampling decision after parsing
   336  		// the content, so we must not tailsample.
   337  		options.TailSample = false
   338  		options.GoalSampleRate = int(options.SampleRate)
   339  		options.SampleRate = 1
   340  	}
   341  }
   342  
   343  func sanityCheckOptions(options *GlobalOptions) {
   344  	switch {
   345  	case options.Reqs.ParserName == "":
   346  		fmt.Println("Parser required to be specified with the --parser flag.")
   347  		usage()
   348  		os.Exit(1)
   349  	case (options.Reqs.WriteKey == "" || options.Reqs.WriteKey == "NULL") && (!options.DebugOut):
   350  		fmt.Println("Write key required to be specified with the --writekey flag.")
   351  		usage()
   352  		os.Exit(1)
   353  	case len(options.Reqs.LogFiles) == 0:
   354  		fmt.Println("Log file name or '-' required to be specified with the --file flag.")
   355  		usage()
   356  		os.Exit(1)
   357  	case options.Reqs.Dataset == "":
   358  		fmt.Println("Dataset name required with the --dataset flag.")
   359  		usage()
   360  		os.Exit(1)
   361  	case options.SampleRate == 0:
   362  		fmt.Println("Sample rate must be an integer >= 1")
   363  		usage()
   364  		os.Exit(1)
   365  	case options.Tail.ReadFrom == "end" && options.Tail.Stop:
   366  		fmt.Println("Reading from the end and stopping when we get there. Zero lines to process. Ok, all done! ;)")
   367  		usage()
   368  		os.Exit(1)
   369  	case options.RequestParseQuery != "whitelist" && options.RequestParseQuery != "all":
   370  		fmt.Println("request_parse_query flag must be either 'whitelist' or 'all'.")
   371  		usage()
   372  		os.Exit(1)
   373  	case (len(options.DynSample) != 0 || options.DeterministicSample != "") && options.SampleRate <= 1 && options.GoalSampleRate <= 1:
   374  		fmt.Println("sample rate flag must be set >= 2 when dynamic or deterministic sampling is enabled")
   375  		usage()
   376  		os.Exit(1)
   377  	case len(options.DynSample) != 0 && options.DeterministicSample != "":
   378  		fmt.Println("dynamic sampling and deterministic sampling cannot be used together")
   379  		usage()
   380  		os.Exit(1)
   381  	case options.DeterministicSample != "" && options.PreSampledField != "":
   382  		fmt.Println("deterministic sampling and a presampled field cannot be used together")
   383  		usage()
   384  		os.Exit(1)
   385  	case options.SampleRate != 1 && options.PreSampledField != "":
   386  		fmt.Println("sampling and a presampled field cannot be used together")
   387  		usage()
   388  		os.Exit(1)
   389  	}
   390  
   391  	// check the prefix regex for validity
   392  	if options.PrefixRegex != "" {
   393  		// make sure the regex is anchored against the start of the string
   394  		if options.PrefixRegex[0] != '^' {
   395  			options.PrefixRegex = "^" + options.PrefixRegex
   396  		}
   397  		// make sure it's valid
   398  		_, err := regexp.Compile(options.PrefixRegex)
   399  		if err != nil {
   400  			fmt.Printf("Prefix regex %s doesn't compile: error %s\n", options.PrefixRegex, err)
   401  			usage()
   402  			os.Exit(1)
   403  		}
   404  	}
   405  
   406  	// Make sure input files exist
   407  	shouldExit := false
   408  	for _, f := range options.Reqs.LogFiles {
   409  		if f == "-" {
   410  			continue
   411  		}
   412  		if files, err := filepath.Glob(f); err != nil || files == nil {
   413  			fmt.Printf("Log file specified by --file=%s not found!\n", f)
   414  			shouldExit = true
   415  		}
   416  	}
   417  	if shouldExit {
   418  		usage()
   419  		os.Exit(1)
   420  	}
   421  }
   422  
   423  func usage() {
   424  	fmt.Print(`
   425  Usage: honeytail -p <parser> -k <writekey> -f </path/to/logfile> -d <mydata> [optional arguments]
   426  
   427  For even more detail on required and optional parameters, run
   428  honeytail --help
   429  `)
   430  }