bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/extrahop.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "net/url" 6 "regexp" 7 "strings" 8 "time" 9 10 "bosun.org/metadata" 11 "bosun.org/opentsdb" 12 "github.com/kylebrandt/gohop" 13 ) 14 15 const extraHopIntervalSeconds int = 30 16 17 var extraHopFilterProtoBy string //What to filter the traffic by. Valid values are "namedprotocols", "toppercent" or "none" 18 var extraHopTopProtoPerc int //Only log the top % of protocols by volume 19 var extraHopOtherProtoName string //What name to log the "other" data under. 20 var extraHopL7Description string //What to append to the end of the L7 description metadata to explain what is and isn't filtered out 21 var extraHopAdditionalMetrics []string //Other metrics to fetch from Extrahop 22 var extraHopCertificateMatch *regexp.Regexp 23 var extraHopCertificateActivityGroup int 24 25 // ExtraHop collection registration 26 func ExtraHop(host, apikey, filterby string, filterpercent int, customMetrics []string, certMatch string, certActivityGroup int) error { 27 if host == "" || apikey == "" { 28 return fmt.Errorf("Empty host or API key for ExtraHop.") 29 } 30 31 extraHopAdditionalMetrics = customMetrics 32 extraHopFilterProtoBy = filterby 33 switch filterby { //Set up options 34 case "toppercent": 35 extraHopL7Description = fmt.Sprintf("Only the top %d percent of traffic has its protocols logged, the remainder is tagged as as proto=otherprotos", extraHopTopProtoPerc) 36 extraHopOtherProtoName = "otherprotos" 37 if filterpercent > 0 && filterpercent < 100 { 38 extraHopTopProtoPerc = filterpercent 39 } else { 40 return fmt.Errorf("Invalid ExtraHop FilterPercent value (%d). Number should be between 1 and 99.", filterpercent) 41 } 42 case "namedprotocols": 43 extraHopL7Description = "Only named protocols are logged. Any unnamed protocol (A protocol name starting with tcp, udp or ssl) is tagged as proto=unnamed" 44 extraHopOtherProtoName = "unnamed" 45 //There is also case "none", but in that case the options we need to keep as default, so there's actually nothing to do here. 46 default: 47 return fmt.Errorf("Invalid ExtraHop FilterBy option (%s). Valid options are namedprotocols, toppercent or none.", filterby) 48 49 } 50 //Add the metadata for the L7 types, as now we have enough information to know what they're going to be 51 for l7type, l7s := range l7types { 52 xhMetricName := fmt.Sprintf("extrahop.l7.%s", l7type) 53 metadata.AddMeta(xhMetricName, nil, "rate", l7s.Rate, false) 54 metadata.AddMeta(xhMetricName, nil, "unit", l7s.Unit, false) 55 metadata.AddMeta(xhMetricName, nil, "desc", fmt.Sprintf("%s %s", l7s.Description, extraHopL7Description), false) 56 } 57 u, err := url.Parse(host) 58 if err != nil { 59 return err 60 } 61 62 if certMatch != "" { 63 compiledRegexp, err := regexp.Compile(certMatch) 64 if err != nil { 65 return err 66 } 67 extraHopCertificateMatch = compiledRegexp 68 extraHopCertificateActivityGroup = certActivityGroup 69 } 70 collectors = append(collectors, &IntervalCollector{ 71 F: func() (opentsdb.MultiDataPoint, error) { 72 return c_extrahop(host, apikey) 73 }, 74 name: fmt.Sprintf("extrahop-%s", u.Host), 75 Interval: time.Second * time.Duration(extraHopIntervalSeconds), 76 }) 77 return nil 78 79 } 80 81 func c_extrahop(host, apikey string) (opentsdb.MultiDataPoint, error) { 82 c := gohop.NewClient(host, apikey) 83 var md opentsdb.MultiDataPoint 84 if err := extraHopNetworks(c, &md); err != nil { 85 return nil, err 86 } 87 if err := extraHopGetAdditionalMetrics(c, &md); err != nil { 88 return nil, err 89 } 90 if err := extraHopGetCertificates(c, &md); err != nil { 91 return nil, err 92 } 93 94 return md, nil 95 } 96 97 func extraHopGetAdditionalMetrics(c *gohop.Client, md *opentsdb.MultiDataPoint) error { 98 for _, v := range extraHopAdditionalMetrics { 99 metric, err := gohop.StoEHMetric(v) 100 if err != nil { 101 return err 102 } 103 ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out. 104 {Name: metric.MetricSpecName, CalcType: metric.MetricSpecCalcType, KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "proto", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: ehMetricNameEscape(v)}, 105 } 106 mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, metric.MetricCategory, metric.ObjectType, -60000, 0, ms, []int64{metric.ObjectId}) 107 if err != nil { 108 return err 109 } 110 111 //This is our function that is going to be executed on each data point in the extrahop dataset 112 appendMetricPoint := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) { 113 switch d.Vtype { 114 case "tset": 115 for _, e := range d.Tset { 116 *md = append(*md, &opentsdb.DataPoint{ 117 Metric: ehMetricNameEscape(d.Key.Str), 118 Timestamp: a.Time, 119 Value: e.Value, 120 Tags: ehItemNameToTagSet(c, e.Key.Str), 121 }) 122 } 123 } 124 } 125 126 processGohopStat(&mrk, c, md, appendMetricPoint) //This will loop through our datapoint structure and execute appendCountPoints on each final data piece 127 } 128 129 return nil 130 } 131 132 // extraHopNetworks grabs the complex metrics of the L7 traffic from ExtraHop. It is a complex type because the data is not just a simple time series, 133 // the data needs to be tagged with vlan, protocol, etc. We can do the network and vlan tagging ourselves, but the protocol tagging comes 134 // from ExtraHop itself. 135 func extraHopNetworks(c *gohop.Client, md *opentsdb.MultiDataPoint) error { 136 nl, err := c.GetNetworkList(true) //Fetch the network list from ExtraHop, and include VLAN information 137 if err != nil { 138 return err 139 } 140 for _, net := range nl { //All found networks 141 for _, vlan := range net.Vlans { //All vlans inside this network 142 for l7type := range l7types { //All the types of data we want to retrieve for the vlan 143 xhMetricName := fmt.Sprintf("extrahop.l7.%s", l7type) 144 metricsDropped, metricsKept := 0, 0 //Counters for debugging purposes 145 otherValues := make(map[int64]int64) //Container to put any extra time series data that we need to add, for consolidating unnamed or dropped protocols, etc. 146 ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to grab from ExtraHop 147 {Name: l7type, KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "proto", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: xhMetricName}, //ExtraHop breaks this by L7 protocol on its own, but we need to tell TSDB what tag to add, which is in this case "proto" 148 } 149 mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "app", "vlan", int64(extraHopIntervalSeconds)*-1000, 0, ms, []int64{vlan.VlanId}) //Get the data from ExtraHop 150 if err != nil { 151 return err 152 } 153 md2, err := mrk.OpenTSDBDataPoints(ms, "vlan", map[int64]string{vlan.VlanId: fmt.Sprintf("%d", vlan.VlanId)}) //Get the OpenTSDBDataPoints from the ExtraHop data 154 if err != nil { 155 return err 156 } 157 valueCutoff := calculateDataCutoff(mrk) //Calculate what the cutoff value will be (used later on when we decide whether or not to consolidate the data) 158 for _, dp := range md2 { //We need to manually process the TSDB datapoints that we've got 159 dp.Tags["host"] = c.APIHost 160 dp.Tags["network"] = net.Name 161 switch extraHopFilterProtoBy { //These are our filter options from the the configuration file. Filter by %, named, or none 162 case "toppercent": //Only include protocols that make up a certain % of the traffic 163 if dp.Value.(int64) >= valueCutoff[dp.Timestamp] { //It's in the top percent so log it as-is 164 *md = append(*md, dp) 165 metricsKept++ 166 } else { 167 otherValues[dp.Timestamp] += dp.Value.(int64) 168 metricsDropped++ 169 } 170 case "namedprotocols": //Only include protocols that have an actual name (SSL443 excepted) 171 if strings.Index(dp.Tags["proto"], "tcp") != 0 && strings.Index(dp.Tags["proto"], "udp") != 0 && (strings.Index(dp.Tags["proto"], "SSL") != 0 || dp.Tags["proto"] == "SSL443") { //The first characters are not tcp or udp. 172 *md = append(*md, dp) 173 metricsKept++ 174 } else { 175 otherValues[dp.Timestamp] += dp.Value.(int64) 176 metricsDropped++ 177 } 178 case "none": //Log everything. Is OK for viewing short timespans, but calculating, 2,000+ protocols over a multi-day window is bad for Bosun's performance 179 *md = append(*md, dp) 180 metricsKept++ 181 } 182 183 } 184 //Take the consolidated values and add them now too 185 for k, v := range otherValues { 186 *md = append(*md, &opentsdb.DataPoint{ 187 Metric: xhMetricName, 188 Timestamp: k, 189 Tags: opentsdb.TagSet{"vlan": fmt.Sprintf("%d", vlan.VlanId), "proto": extraHopOtherProtoName, "host": c.APIHost, "network": net.Name}, 190 Value: v, 191 }) 192 } 193 } 194 } 195 } 196 return nil 197 } 198 199 func extraHopGetCertificates(c *gohop.Client, md *opentsdb.MultiDataPoint) error { 200 if extraHopCertificateMatch == nil { 201 return nil 202 } 203 204 if err := extraHopGetCertificateByCount(c, md); err != nil { 205 return err 206 } 207 208 if err := extraHopGetCertificateByExpiry(c, md); err != nil { 209 return err 210 } 211 212 return nil 213 } 214 215 func extraHopGetCertificateByCount(c *gohop.Client, md *opentsdb.MultiDataPoint) error { 216 //These are the metrics we are populating in this part of the collector 217 metricNameCount := "extrahop.certificates" 218 219 //Metadata for the above metrics 220 metadata.AddMeta(metricNameCount, nil, "rate", metadata.Gauge, false) 221 metadata.AddMeta(metricNameCount, nil, "unit", metadata.Count, false) 222 metadata.AddMeta(metricNameCount, nil, "desc", "The number of times a given certificate was seen", false) 223 224 ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out. 225 {Name: "cert_subject", KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: metricNameCount}, 226 } 227 mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "ssl_server_detail", "activity_group", -60000, 0, ms, []int64{int64(extraHopCertificateActivityGroup)}) 228 if err != nil { 229 return err 230 } 231 232 //At this time we have a keyed metric response from ExtraHop. We need to find all the stats, then the values of the stats, and then 233 //filter out to only the records we want. 234 235 //This is our function that is going to be executed on each data point in the extrahop dataset 236 appendCountPoints := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) { 237 thisPoint := getSSLDataPointFromSet(metricNameCount, c.APIUrl.Host, a.Time, d) 238 if thisPoint != nil { 239 *md = append(*md, thisPoint) 240 } 241 } 242 243 processGohopStat(&mrk, c, md, appendCountPoints) //This will loop through our datapoint structure and execute appendCountPoints on each final data piece 244 245 return nil 246 } 247 func extraHopGetCertificateByExpiry(c *gohop.Client, md *opentsdb.MultiDataPoint) error { 248 //These are the metrics we are populating in this part of the collector 249 metricNameExpiry := "extrahop.certificates.expiry" 250 metricNameTillExpiry := "extrahop.certificates.tillexpiry" 251 252 //Metadata for the above metrics 253 metadata.AddMeta(metricNameExpiry, nil, "rate", metadata.Gauge, false) 254 metadata.AddMeta(metricNameExpiry, nil, "unit", metadata.Timestamp, false) 255 metadata.AddMeta(metricNameExpiry, nil, "desc", "Timestamp of when the certificate expires", false) 256 257 metadata.AddMeta(metricNameTillExpiry, nil, "rate", metadata.Gauge, false) 258 metadata.AddMeta(metricNameTillExpiry, nil, "unit", metadata.Second, false) 259 metadata.AddMeta(metricNameTillExpiry, nil, "desc", "Number of seconds until the certificate expires", false) 260 261 ms := []gohop.MetricSpec{ //Build a metric spec to tell ExtraHop what we want to pull out. 262 {Name: "cert_expiration", KeyPair: gohop.KeyPair{Key1Regex: "", Key2Regex: "", OpenTSDBKey1: "", Key2OpenTSDBKey2: ""}, OpenTSDBMetric: metricNameExpiry}, 263 } 264 mrk, err := c.KeyedMetricQuery(gohop.Cycle30Sec, "ssl_server_detail", "activity_group", -60000, 0, ms, []int64{int64(extraHopCertificateActivityGroup)}) 265 if err != nil { 266 return err 267 } 268 269 //At this time we have a keyed metric response from ExtraHop. We need to find all the stats, then the values of the stats, and then 270 //filter out to only the records we want. 271 272 //This is our function that is going to be executed on each data point in the extrahop dataset 273 appendExpiryPoints := func(c *gohop.Client, md *opentsdb.MultiDataPoint, a *gohop.MetricStatKeyed, b *[]gohop.MetricStatKeyedValue, d *gohop.MetricStatKeyedValue) { 274 thisPointExpiry := getSSLDataPointFromSet(metricNameExpiry, c.APIUrl.Host, a.Time, d) 275 if thisPointExpiry != nil { 276 *md = append(*md, thisPointExpiry) 277 } 278 279 thisPointTillExpiry := getSSLDataPointFromSet(metricNameTillExpiry, c.APIUrl.Host, a.Time, d) 280 if thisPointTillExpiry != nil { 281 thisPointTillExpiry.Value = thisPointTillExpiry.Value.(int64) - (a.Time / 1000) 282 *md = append(*md, thisPointTillExpiry) 283 } 284 } 285 286 processGohopStat(&mrk, c, md, appendExpiryPoints) //This will loop through our datapoint structure and execute appendExpiryPoints on each final data piece 287 return nil 288 } 289 290 type processFunc func(*gohop.Client, *opentsdb.MultiDataPoint, *gohop.MetricStatKeyed, *[]gohop.MetricStatKeyedValue, *gohop.MetricStatKeyedValue) 291 292 func processGohopStat(mrk *gohop.MetricResponseKeyed, c *gohop.Client, md *opentsdb.MultiDataPoint, pc processFunc) { 293 for _, a := range mrk.Stats { 294 for _, b := range a.Values { 295 for _, d := range b { 296 pc(c, md, &a, &b, &d) 297 } 298 } 299 } 300 } 301 302 func getSSLDataPointFromSet(metricName, APIUrlHost string, timestamp int64, d *gohop.MetricStatKeyedValue) *opentsdb.DataPoint { 303 //The metric key comes as subject:crypt_strength, e.g. *.example.com:RSA_2048 304 if strings.IndexAny(d.Key.Str, ":") == -1 { //If the certificate key doesn't contain a : then ignore 305 return nil 306 } 307 certParts := strings.Split(d.Key.Str, ":") //Get the subject and the crypt_strength into seperate parts 308 if len(certParts) != 2 { //If we don't get exactly 2 parts when we split on the :, then ignore 309 return nil 310 } 311 certSubject := strings.ToLower(certParts[0]) //Make the subject consistently lowercase 312 certStrength := certParts[1] //Get the crypt_strength 313 if !extraHopCertificateMatch.MatchString(certSubject) { //If this certificate does not match the subject name we're filtering on, then ignore 314 return nil 315 } 316 certSubject = strings.Replace(certSubject, "*.", "wild_", -1) //* is an important part of the subject, but an invalid tag. This should make it pretty obvious that we mean a wildcard cert, not a subdomain of "wild" 317 certTags := opentsdb.TagSet{"host": strings.ToLower(APIUrlHost), "subject": certSubject, "keysize": certStrength} //Tags for the metrics 318 //Add a key that is the raw expiry time 319 return &opentsdb.DataPoint{ 320 Metric: metricName, 321 Timestamp: timestamp, 322 Value: d.Value, 323 Tags: certTags, 324 } 325 } 326 327 //These are used when looping through which L7 traffic to get. We want byte counts and packet counts, and this is the metadata that goes with them. 328 var l7types = map[string]L7Stats{ 329 "bytes": {Rate: metadata.Gauge, Unit: metadata.Bytes, Description: "The number of bytes transmitted on this network.You can drill down by server, network, vlan and protocol for further investigations."}, 330 "pkts": {Rate: metadata.Gauge, Unit: metadata.Counter, Description: "The number of packets transmitted on this network. You can drill down by server, network, vlan and protocol for further investigations."}, 331 } 332 333 type L7Stats struct { 334 Rate metadata.RateType 335 Unit metadata.Unit 336 Description string 337 } 338 339 //Given the % value in the configuration file, calculate what the actual minimum value is for each of the time points returned by ExtraHop 340 func calculateDataCutoff(k gohop.MetricResponseKeyed) map[int64]int64 { 341 sums := make(map[int64]int64) 342 rets := make(map[int64]int64) 343 for _, dp := range k.Stats { 344 for _, dv := range dp.Values { 345 for _, dw := range dv { 346 sums[dp.Time/1000] += dw.Value 347 } 348 349 } 350 } 351 for k, v := range sums { 352 rets[k] = int64(float64(v) * (1 - float64(extraHopTopProtoPerc)/100)) 353 } 354 return rets 355 } 356 357 func ehItemNameToTagSet(c *gohop.Client, ehName string) opentsdb.TagSet { 358 thisTagSet := opentsdb.TagSet{"host": strings.ToLower(c.APIUrl.Host)} 359 if strings.IndexAny(ehName, ",") == 0 { 360 return thisTagSet 361 } 362 nameParts := strings.Split(ehName, ",") 363 for _, p := range nameParts { 364 tagParts := strings.Split(p, "=") 365 if len(tagParts) > 0 { 366 thisTagSet[tagParts[0]] = tagParts[1] 367 } 368 } 369 return thisTagSet 370 } 371 372 func ehMetricNameEscape(metricName string) string { 373 metricName = strings.ToLower(metricName) 374 metricName = strings.Replace(metricName, " ", "_", -1) 375 return fmt.Sprintf("extrahop.application.%v", metricName) 376 }