bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/extrahop.go (about)

     1  package collectors
     2  
     3  import (
     4  	"fmt"
     5  	"net/url"
     6  	"regexp"
     7  	"strings"
     8  	"time"
     9  
    10  	"bosun.org/metadata"
    11  	"bosun.org/opentsdb"
    12  	"github.com/kylebrandt/gohop"
    13  )
    14  
    15  const extraHopIntervalSeconds int = 30
    16  
    17  var extraHopFilterProtoBy string       //What to filter the traffic by. Valid values are "namedprotocols", "toppercent" or "none"
    18  var extraHopTopProtoPerc int           //Only log the top % of protocols by volume
    19  var extraHopOtherProtoName string      //What name to log the "other" data under.
    20  var extraHopL7Description string       //What to append to the end of the L7 description metadata to explain what is and isn't filtered out
    21  var extraHopAdditionalMetrics []string //Other metrics to fetch from Extrahop
    22  var extraHopCertificateMatch *regexp.Regexp
    23  var extraHopCertificateActivityGroup int
    24  
    25  // ExtraHop collection registration
    26  func ExtraHop(host, apikey, filterby string, filterpercent int, customMetrics []string, certMatch string, certActivityGroup int) error {
    27  	if host == "" || apikey == "" {
    28  		return fmt.Errorf("Empty host or API key for ExtraHop.")
    29  	}
    30  
    31  	extraHopAdditionalMetrics = customMetrics
    32  	extraHopFilterProtoBy = filterby
    33  	switch filterby { //Set up options
    34  	case "toppercent":
    35  		extraHopL7Description = fmt.Sprintf("Only the top %d percent of traffic has its protocols logged, the remainder is tagged as as proto=otherprotos", extraHopTopProtoPerc)
    36  		extraHopOtherProtoName = "otherprotos"
    37  		if filterpercent > 0 && filterpercent < 100 {
    38  			extraHopTopProtoPerc = filterpercent
    39  		} else {
    40  			return fmt.Errorf("Invalid ExtraHop FilterPercent value (%d). Number should be between 1 and 99.", filterpercent)
    41  		}
    42  	case "namedprotocols":
    43  		extraHopL7Description = "Only named protocols are logged. Any unnamed protocol (A protocol name starting with tcp, udp or ssl) is tagged as proto=unnamed"
    44  		extraHopOtherProtoName = "unnamed"
    45  	//There is also case "none", but in that case the options we need to keep as default, so there's actually nothing to do here.
    46  	default:
    47  		return fmt.Errorf("Invalid ExtraHop FilterBy option (%s). Valid options are namedprotocols, toppercent or none.", filterby)
    48  
    49  	}
    50  	//Add the metadata for the L7 types, as now we have enough information to know what they're going to be
    51  	for l7type, l7s := range l7types {
    52  		xhMetricName := fmt.Sprintf("extrahop.l7.%s", l7type)
    53  		metadata.AddMeta(xhMetricName, nil, "rate", l7s.Rate, false)
    54  		metadata.AddMeta(xhMetricName, nil, "unit", l7s.Unit, false)
    55  		metadata.AddMeta(xhMetricName, nil, "desc", fmt.Sprintf("%s %s", l7s.Description, extraHopL7Description), false)
    56  	}
    57  	u, err := url.Parse(host)
    58  	if err != nil {
    59  		return err
    60  	}
    61  
    62  	if certMatch != "" {
    63  		compiledRegexp, err := regexp.Compile(certMatch)
    64  		if err != nil {
    65  			return err
    66  		}
    67  		extraHopCertificateMatch = compiledRegexp
    68  		extraHopCertificateActivityGroup = certActivityGroup
    69  	}
    70  	collectors = append(collectors, &IntervalCollector{
    71  		F: func() (opentsdb.MultiDataPoint, error) {
    72  			return c_extrahop(host, apikey)
    73  		},
    74  		name:     fmt.Sprintf("extrahop-%s", u.Host),
    75  		Interval: time.Second * time.Duration(extraHopIntervalSeconds),
    76  	})
    77  	return nil
    78  
    79  }
    80  
    81  func c_extrahop(host, apikey string) (opentsdb.MultiDataPoint, error) {
    82  	c := gohop.NewClient(host, apikey)
    83  	var md opentsdb.MultiDataPoint
    84  	if err := extraHopNetworks(c, &md); err != nil {
    85  		return nil, err
    86  	}
    87  	if err := extraHopGetAdditionalMetrics(c, &md); err != nil {
    88  		return nil, err
    89  	}
    90  	if err := extraHopGetCertificates(c, &md); err != nil {
    91  		return nil, err
    92  	}
    93  
    94  	return md, nil
    95  }
    96  
    97  func extraHopGetAdditionalMetrics(c *gohop.Client, md *opentsdb.MultiDataPoint) error {
    98  	for _, v := range extraHopAdditionalMetrics {
    99  		metric, err := gohop.StoEHMetric(v)
   100  		if err != nil {
   101  			return err
   102  		}
   103  		ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out.
   104  			{Name: metric.MetricSpecName, CalcType: metric.MetricSpecCalcType, KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "proto", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: ehMetricNameEscape(v)},
   105  		}
   106  		mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, metric.MetricCategory, metric.ObjectType, -60000, 0, ms, []int64{metric.ObjectId})
   107  		if err != nil {
   108  			return err
   109  		}
   110  
   111  		//This is our function that is going to be executed on each data point in the extrahop dataset
   112  		appendMetricPoint := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) {
   113  			switch d.Vtype {
   114  			case "tset":
   115  				for _, e := range d.Tset {
   116  					*md = append(*md, &opentsdb.DataPoint{
   117  						Metric:    ehMetricNameEscape(d.Key.Str),
   118  						Timestamp: a.Time,
   119  						Value:     e.Value,
   120  						Tags:      ehItemNameToTagSet(c, e.Key.Str),
   121  					})
   122  				}
   123  			}
   124  		}
   125  
   126  		processGohopStat(&mrk, c, md, appendMetricPoint) //This will loop through our datapoint structure and execute appendCountPoints on each final data piece
   127  	}
   128  
   129  	return nil
   130  }
   131  
   132  // extraHopNetworks grabs the complex metrics of the L7 traffic from ExtraHop. It is a complex type because the data is not just a simple time series,
   133  // the data needs to be tagged with vlan, protocol, etc. We can do the network and vlan tagging ourselves, but the protocol tagging comes
   134  // from ExtraHop itself.
   135  func extraHopNetworks(c *gohop.Client, md *opentsdb.MultiDataPoint) error {
   136  	nl, err := c.GetNetworkList(true) //Fetch the network list from ExtraHop, and include VLAN information
   137  	if err != nil {
   138  		return err
   139  	}
   140  	for _, net := range nl { //All found networks
   141  		for _, vlan := range net.Vlans { //All vlans inside this network
   142  			for l7type := range l7types { //All the types of data we want to retrieve for the vlan
   143  				xhMetricName := fmt.Sprintf("extrahop.l7.%s", l7type)
   144  				metricsDropped, metricsKept := 0, 0  //Counters for debugging purposes
   145  				otherValues := make(map[int64]int64) //Container to put any extra time series data that we need to add, for consolidating unnamed or dropped protocols, etc.
   146  				ms := []gohop.MetricSpec{            //Build a metric spec to tell ExtraHop what we want to grab from ExtraHop
   147  					{Name: l7type, KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "proto", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: xhMetricName}, //ExtraHop breaks this by L7 protocol on its own, but we need to tell TSDB what tag to add, which is in this case "proto"
   148  				}
   149  				mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "app", "vlan", int64(extraHopIntervalSeconds)*-1000, 0, ms, []int64{vlan.VlanId}) //Get the data from ExtraHop
   150  				if err != nil {
   151  					return err
   152  				}
   153  				md2, err := mrk.OpenTSDBDataPoints(ms, "vlan", map[int64]string{vlan.VlanId: fmt.Sprintf("%d", vlan.VlanId)}) //Get the OpenTSDBDataPoints from the ExtraHop data
   154  				if err != nil {
   155  					return err
   156  				}
   157  				valueCutoff := calculateDataCutoff(mrk) //Calculate what the cutoff value will be (used later on when we decide whether or not to consolidate the data)
   158  				for _, dp := range md2 {                //We need to manually process the TSDB datapoints that we've got
   159  					dp.Tags["host"] = c.APIHost
   160  					dp.Tags["network"] = net.Name
   161  					switch extraHopFilterProtoBy { //These are our filter options from the the configuration file. Filter by %, named, or none
   162  					case "toppercent": //Only include protocols that make up a certain % of the traffic
   163  						if dp.Value.(int64) >= valueCutoff[dp.Timestamp] { //It's in the top percent so log it as-is
   164  							*md = append(*md, dp)
   165  							metricsKept++
   166  						} else {
   167  							otherValues[dp.Timestamp] += dp.Value.(int64)
   168  							metricsDropped++
   169  						}
   170  					case "namedprotocols": //Only include protocols that have an actual name (SSL443 excepted)
   171  						if strings.Index(dp.Tags["proto"], "tcp") != 0 && strings.Index(dp.Tags["proto"], "udp") != 0 && (strings.Index(dp.Tags["proto"], "SSL") != 0 || dp.Tags["proto"] == "SSL443") { //The first characters are not tcp or udp.
   172  							*md = append(*md, dp)
   173  							metricsKept++
   174  						} else {
   175  							otherValues[dp.Timestamp] += dp.Value.(int64)
   176  							metricsDropped++
   177  						}
   178  					case "none": //Log everything. Is OK for viewing short timespans, but calculating, 2,000+ protocols over a multi-day window is bad for Bosun's performance
   179  						*md = append(*md, dp)
   180  						metricsKept++
   181  					}
   182  
   183  				}
   184  				//Take the consolidated values and add them now too
   185  				for k, v := range otherValues {
   186  					*md = append(*md, &opentsdb.DataPoint{
   187  						Metric:    xhMetricName,
   188  						Timestamp: k,
   189  						Tags:      opentsdb.TagSet{"vlan": fmt.Sprintf("%d", vlan.VlanId), "proto": extraHopOtherProtoName, "host": c.APIHost, "network": net.Name},
   190  						Value:     v,
   191  					})
   192  				}
   193  			}
   194  		}
   195  	}
   196  	return nil
   197  }
   198  
   199  func extraHopGetCertificates(c *gohop.Client, md *opentsdb.MultiDataPoint) error {
   200  	if extraHopCertificateMatch == nil {
   201  		return nil
   202  	}
   203  
   204  	if err := extraHopGetCertificateByCount(c, md); err != nil {
   205  		return err
   206  	}
   207  
   208  	if err := extraHopGetCertificateByExpiry(c, md); err != nil {
   209  		return err
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  func extraHopGetCertificateByCount(c *gohop.Client, md *opentsdb.MultiDataPoint) error {
   216  	//These are the metrics we are populating in this part of the collector
   217  	metricNameCount := "extrahop.certificates"
   218  
   219  	//Metadata for the above metrics
   220  	metadata.AddMeta(metricNameCount, nil, "rate", metadata.Gauge, false)
   221  	metadata.AddMeta(metricNameCount, nil, "unit", metadata.Count, false)
   222  	metadata.AddMeta(metricNameCount, nil, "desc", "The number of times a given certificate was seen", false)
   223  
   224  	ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out.
   225  		{Name: "cert_subject", KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: metricNameCount},
   226  	}
   227  	mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "ssl_server_detail", "activity_group", -60000, 0, ms, []int64{int64(extraHopCertificateActivityGroup)})
   228  	if err != nil {
   229  		return err
   230  	}
   231  
   232  	//At this time we have a keyed metric response from ExtraHop. We need to find all the stats, then the values of the stats, and then
   233  	//filter out to only the records we want.
   234  
   235  	//This is our function that is going to be executed on each data point in the extrahop dataset
   236  	appendCountPoints := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) {
   237  		thisPoint := getSSLDataPointFromSet(metricNameCount, c.APIUrl.Host, a.Time, d)
   238  		if thisPoint != nil {
   239  			*md = append(*md, thisPoint)
   240  		}
   241  	}
   242  
   243  	processGohopStat(&mrk, c, md, appendCountPoints) //This will loop through our datapoint structure and execute appendCountPoints on each final data piece
   244  
   245  	return nil
   246  }
   247  func extraHopGetCertificateByExpiry(c *gohop.Client, md *opentsdb.MultiDataPoint) error {
   248  	//These are the metrics we are populating in this part of the collector
   249  	metricNameExpiry := "extrahop.certificates.expiry"
   250  	metricNameTillExpiry := "extrahop.certificates.tillexpiry"
   251  
   252  	//Metadata for the above metrics
   253  	metadata.AddMeta(metricNameExpiry, nil, "rate", metadata.Gauge, false)
   254  	metadata.AddMeta(metricNameExpiry, nil, "unit", metadata.Timestamp, false)
   255  	metadata.AddMeta(metricNameExpiry, nil, "desc", "Timestamp of when the certificate expires", false)
   256  
   257  	metadata.AddMeta(metricNameTillExpiry, nil, "rate", metadata.Gauge, false)
   258  	metadata.AddMeta(metricNameTillExpiry, nil, "unit", metadata.Second, false)
   259  	metadata.AddMeta(metricNameTillExpiry, nil, "desc", "Number of seconds until the certificate expires", false)
   260  
   261  	ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out.
   262  		{Name: "cert_expiration", KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: metricNameExpiry},
   263  	}
   264  	mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "ssl_server_detail", "activity_group", -60000, 0, ms, []int64{int64(extraHopCertificateActivityGroup)})
   265  	if err != nil {
   266  		return err
   267  	}
   268  
   269  	//At this time we have a keyed metric response from ExtraHop. We need to find all the stats, then the values of the stats, and then
   270  	//filter out to only the records we want.
   271  
   272  	//This is our function that is going to be executed on each data point in the extrahop dataset
   273  	appendExpiryPoints := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) {
   274  		thisPointExpiry := getSSLDataPointFromSet(metricNameExpiry, c.APIUrl.Host, a.Time, d)
   275  		if thisPointExpiry != nil {
   276  			*md = append(*md, thisPointExpiry)
   277  		}
   278  
   279  		thisPointTillExpiry := getSSLDataPointFromSet(metricNameTillExpiry, c.APIUrl.Host, a.Time, d)
   280  		if thisPointTillExpiry != nil {
   281  			thisPointTillExpiry.Value = thisPointTillExpiry.Value.(int64) - (a.Time / 1000)
   282  			*md = append(*md, thisPointTillExpiry)
   283  		}
   284  	}
   285  
   286  	processGohopStat(&mrk, c, md, appendExpiryPoints) //This will loop through our datapoint structure and execute appendExpiryPoints on each final data piece
   287  	return nil
   288  }
   289  
   290  type processFunc func(*gohop.Client, *opentsdb.MultiDataPoint, *gohop.MetricStatKeyed, *[]gohop.MetricStatKeyedValue, *gohop.MetricStatKeyedValue)
   291  
   292  func processGohopStat(mrk *gohop.MetricResponseKeyed, c *gohop.Client, md *opentsdb.MultiDataPoint, pc processFunc) {
   293  	for _, a := range mrk.Stats {
   294  		for _, b := range a.Values {
   295  			for _, d := range b {
   296  				pc(c, md, &a, &b, &d)
   297  			}
   298  		}
   299  	}
   300  }
   301  
   302  func getSSLDataPointFromSet(metricName, APIUrlHost string, timestamp int64, d *gohop.MetricStatKeyedValue) *opentsdb.DataPoint {
   303  	//The metric key comes as subject:crypt_strength, e.g. *.example.com:RSA_2048
   304  	if strings.IndexAny(d.Key.Str, ":") == -1 { //If the certificate key doesn't contain a : then ignore
   305  		return nil
   306  	}
   307  	certParts := strings.Split(d.Key.Str, ":") //Get the subject and the crypt_strength into seperate parts
   308  	if len(certParts) != 2 {                   //If we don't get exactly 2 parts when we split on the :, then ignore
   309  		return nil
   310  	}
   311  	certSubject := strings.ToLower(certParts[0])            //Make the subject consistently lowercase
   312  	certStrength := certParts[1]                            //Get the crypt_strength
   313  	if !extraHopCertificateMatch.MatchString(certSubject) { //If this certificate does not match the subject name we're filtering on, then ignore
   314  		return nil
   315  	}
   316  	certSubject = strings.Replace(certSubject, "*.", "wild_", -1)                                                     //* is an important part of the subject, but an invalid tag. This should make it pretty obvious that we mean a wildcard cert, not a subdomain of "wild"
   317  	certTags := opentsdb.TagSet{"host": strings.ToLower(APIUrlHost), "subject": certSubject, "keysize": certStrength} //Tags for the metrics
   318  	//Add a key that is the raw expiry time
   319  	return &opentsdb.DataPoint{
   320  		Metric:    metricName,
   321  		Timestamp: timestamp,
   322  		Value:     d.Value,
   323  		Tags:      certTags,
   324  	}
   325  }
   326  
   327  //These are used when looping through which L7 traffic to get. We want byte counts and packet counts, and this is the metadata that goes with them.
   328  var l7types = map[string]L7Stats{
   329  	"bytes": {Rate: metadata.Gauge, Unit: metadata.Bytes, Description: "The number of bytes transmitted on this network.You can drill down by server, network, vlan and protocol for further investigations."},
   330  	"pkts":  {Rate: metadata.Gauge, Unit: metadata.Counter, Description: "The number of packets transmitted on this network. You can drill down by server, network, vlan and protocol for further investigations."},
   331  }
   332  
   333  type L7Stats struct {
   334  	Rate        metadata.RateType
   335  	Unit        metadata.Unit
   336  	Description string
   337  }
   338  
   339  //Given the % value in the configuration file, calculate what the actual minimum value is for each of the time points returned by ExtraHop
   340  func calculateDataCutoff(k gohop.MetricResponseKeyed) map[int64]int64 {
   341  	sums := make(map[int64]int64)
   342  	rets := make(map[int64]int64)
   343  	for _, dp := range k.Stats {
   344  		for _, dv := range dp.Values {
   345  			for _, dw := range dv {
   346  				sums[dp.Time/1000] += dw.Value
   347  			}
   348  
   349  		}
   350  	}
   351  	for k, v := range sums {
   352  		rets[k] = int64(float64(v) * (1 - float64(extraHopTopProtoPerc)/100))
   353  	}
   354  	return rets
   355  }
   356  
   357  func ehItemNameToTagSet(c *gohop.Client, ehName string) opentsdb.TagSet {
   358  	thisTagSet := opentsdb.TagSet{"host": strings.ToLower(c.APIUrl.Host)}
   359  	if strings.IndexAny(ehName, ",") == 0 {
   360  		return thisTagSet
   361  	}
   362  	nameParts := strings.Split(ehName, ",")
   363  	for _, p := range nameParts {
   364  		tagParts := strings.Split(p, "=")
   365  		if len(tagParts) > 0 {
   366  			thisTagSet[tagParts[0]] = tagParts[1]
   367  		}
   368  	}
   369  	return thisTagSet
   370  }
   371  
   372  func ehMetricNameEscape(metricName string) string {
   373  	metricName = strings.ToLower(metricName)
   374  	metricName = strings.Replace(metricName, " ", "_", -1)
   375  	return fmt.Sprintf("extrahop.application.%v", metricName)
   376  }