bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/haproxy_unix.go (about) 1 package collectors 2 3 import ( 4 "encoding/csv" 5 "fmt" 6 "net/http" 7 "strconv" 8 "strings" 9 10 "bosun.org/cmd/scollector/conf" 11 "bosun.org/metadata" 12 "bosun.org/opentsdb" 13 ) 14 15 func init() { 16 registerInit(func(c *conf.Conf) { 17 for _, h := range c.HAProxy { 18 for _, i := range h.Instances { 19 ii := i 20 collectors = append(collectors, &IntervalCollector{ 21 F: func() (opentsdb.MultiDataPoint, error) { 22 if ii.User != "" { 23 return haproxyFetch(ii.User, ii.Password, ii.Tier, ii.URL) 24 } else { 25 return haproxyFetch(h.User, h.Password, ii.Tier, ii.URL) 26 } 27 }, 28 name: fmt.Sprintf("haproxy-%s-%s", ii.Tier, ii.URL), 29 }) 30 } 31 } 32 }) 33 } 34 35 func haproxyFetch(user, pwd, tier, url string) (opentsdb.MultiDataPoint, error) { 36 var md opentsdb.MultiDataPoint 37 var err error 38 const metric = "haproxy" 39 parse := func(v string) (int64, error) { 40 var i int64 41 if v != "" { 42 i, err = strconv.ParseInt(v, 10, 64) 43 if err != nil { 44 return 0, err 45 } 46 return i, nil 47 } 48 return i, nil 49 } 50 51 req, err := http.NewRequest("GET", url, nil) 52 if err != nil { 53 return nil, err 54 } 55 // Close connection after request. Default cached connections will get 56 // failures in the event of server closing idle connections. 57 // See https://github.com/golang/go/issues/8946 58 req.Close = true 59 req.SetBasicAuth(user, pwd) 60 resp, err := http.DefaultClient.Do(req) 61 if err != nil { 62 return nil, err 63 } 64 defer resp.Body.Close() 65 reader := csv.NewReader(resp.Body) 66 records, err := reader.ReadAll() 67 if err != nil { 68 return nil, err 69 } 70 if len(records) < 2 { 71 return nil, nil 72 } 73 // can't rely on number of colums with new (>=1.7) haproxy versions, lets check if there any data 74 if len(records[1]) < 16 { 75 return nil, fmt.Errorf("expected more columns with data. got: %v", len(records[1])) 76 } 77 78 for _, rec := range records[1:] { 79 hType := haproxyType[rec[32]] 80 pxname := rec[0] 81 svname := rec[1] 82 ts := opentsdb.TagSet{"pxname": pxname, "svname": svname, "tier": tier} 83 for i, field := range haproxyCSVMeta { 84 if i >= len(rec) { 85 break 86 } 87 m := strings.Join([]string{metric, hType, field.Name}, ".") 88 value := rec[i] 89 if field.Ignore == true { 90 continue 91 } else if strings.HasPrefix(field.Name, "hrsp") { 92 sp := strings.Split(field.Name, "_") 93 if len(sp) != 2 { 94 return nil, fmt.Errorf("unexpected field name %v in hrsp", field.Name) 95 } 96 ts := ts.Copy().Merge(opentsdb.TagSet{"status_code": sp[1]}) 97 m = strings.Join([]string{metric, hType, sp[0]}, ".") 98 v, err := parse(value) 99 if err != nil { 100 return nil, err 101 } 102 Add(&md, m, v, ts, metadata.Counter, metadata.Response, 103 fmt.Sprintf("The number of http responses with a %v status code.", sp[1])) 104 } else if field.Name == "status" { 105 v, ok := haproxyStatus[value] 106 // Not distinging between MAINT and MAINT via... 107 if !ok { 108 v = 3 109 } 110 Add(&md, m, v, ts, field.RateType, field.Unit, field.Desc) 111 } else if field.Name == "check_status" { 112 if value == "" { 113 continue 114 } 115 v, ok := haproxyCheckStatus[value] 116 if !ok { 117 return nil, fmt.Errorf("unknown check status %v", value) 118 } 119 Add(&md, m, v, ts, field.RateType, field.Unit, field.Desc) 120 } else { 121 v, err := parse(value) 122 if err != nil { 123 return nil, err 124 } 125 Add(&md, m, v, ts, field.RateType, field.Unit, field.Desc) 126 } 127 } 128 } 129 return md, nil 130 } 131 132 // MetricMetaHAProxy is a super-structure which adds a friendly Name, 133 // as well as an indicator on if a metric is to be ignored. 134 type MetricMetaHAProxy struct { 135 Name string 136 Ignore bool 137 MetricMeta 138 } 139 140 var haproxyType = map[string]string{ 141 "0": "frontend", 142 "1": "backend", 143 "2": "server", 144 "3": "listen", 145 } 146 147 var haproxyCheckStatus = map[string]int{ 148 "UNK": 0, 149 "INI": 1, 150 "SOCKERR": 2, 151 "L4OK": 3, 152 "L4TOUT": 4, 153 "L4CON": 5, 154 "L6OK": 6, 155 "L6TOUT": 7, 156 "L6RSP": 8, 157 "L7OK": 9, 158 "L7OKC": 10, 159 "L7TOUT": 11, 160 "L7RSP": 12, 161 "L7STS": 13, 162 } 163 164 var haproxyStatus = map[string]int{ 165 "UP": 0, 166 "DOWN": 1, 167 "NOLB": 2, 168 "MAINT": 3, 169 } 170 171 // A slice of fields which are presented by haproxy's CSV data. 172 // See "CSV format" in http://www.haproxy.org/download/1.5/doc/configuration.txt 173 var haproxyCSVMeta = []MetricMetaHAProxy{ 174 { 175 Name: "pxname", 176 Ignore: true, 177 }, 178 { 179 Name: "svname", 180 Ignore: true, 181 }, 182 { 183 Name: "qcur", 184 MetricMeta: MetricMeta{RateType: metadata.Gauge, 185 Unit: metadata.Request, 186 Desc: "The current queued requests. For the backend this reports the number queued without a server assigned.", 187 }}, 188 { 189 Name: "qmax", 190 MetricMeta: MetricMeta{RateType: metadata.Gauge, 191 Unit: metadata.Request, 192 Desc: "The max value of qcur.", 193 }}, 194 { 195 Name: "scur", 196 MetricMeta: MetricMeta{RateType: metadata.Gauge, 197 Unit: metadata.Session, 198 Desc: "The current number of sessions.", 199 }}, 200 { 201 Name: "smax", 202 MetricMeta: MetricMeta{RateType: metadata.Gauge, 203 Unit: metadata.Session, 204 Desc: "The maximum number of concurrent sessions seen.", 205 }}, 206 { 207 Name: "slim", 208 MetricMeta: MetricMeta{RateType: metadata.Gauge, 209 Unit: metadata.Session, 210 Desc: "The configured session limit.", 211 }}, 212 { 213 Name: "stot", 214 MetricMeta: MetricMeta{RateType: metadata.Counter, 215 Unit: metadata.Session, 216 Desc: "The total number of sessions.", 217 }}, 218 { 219 Name: "bin", 220 MetricMeta: MetricMeta{RateType: metadata.Counter, 221 Unit: metadata.Bytes, 222 Desc: "The number of bytes in.", 223 }}, 224 { 225 Name: "bout", 226 MetricMeta: MetricMeta{RateType: metadata.Counter, 227 Unit: metadata.Bytes, 228 Desc: "The number of bytes out.", 229 }}, 230 { 231 Name: "dreq", 232 MetricMeta: MetricMeta{RateType: metadata.Counter, 233 Unit: metadata.Request, 234 Desc: "The number of requests denied because of security concerns. For tcp this is because of a matched tcp-request content rule. For http this is because of a matched http-request or tarpit rule.", 235 }}, 236 { 237 Name: "dresp", 238 MetricMeta: MetricMeta{RateType: metadata.Counter, 239 Unit: metadata.Response, 240 Desc: "The number of responses denied because of security concerns. For http this is because of a matched http-request rule, or 'option checkcache'.", 241 }}, 242 { 243 Name: "ereq", 244 MetricMeta: MetricMeta{RateType: metadata.Counter, 245 Unit: metadata.Request, 246 Desc: "The number of request errors. Some of the possible causes are: Early termination from the client before the request has been sent, a read error from the client, a client timeout, a client closed connection, various bad requests from the client or the request was tarpitted.", 247 }}, 248 { 249 Name: "econ", 250 MetricMeta: MetricMeta{RateType: metadata.Counter, 251 Unit: metadata.Request, 252 Desc: "The number of number of requests that encountered an error trying to connect to a backend server. The backend stat is the sum of the stat for all servers of that backend, plus any connection errors not associated with a particular server (such as the backend having no active servers).", 253 }}, 254 { 255 Name: "eresp", 256 MetricMeta: MetricMeta{RateType: metadata.Counter, 257 Unit: metadata.Response, 258 Desc: " The number of response errors. srv_abrt will be counted here also. Some errors are: write error on the client socket (won't be counted for the server stat) and failure applying filters to the response.", 259 }}, 260 { 261 Name: "wretr", 262 MetricMeta: MetricMeta{RateType: metadata.Counter, 263 Unit: metadata.Retry, 264 Desc: "The number of times a connection to a server was retried.", 265 }}, 266 { 267 Name: "wredis", 268 MetricMeta: MetricMeta{RateType: metadata.Counter, 269 Unit: metadata.Redispatch, 270 Desc: "number of times a request was redispatched to another server. The server value counts the number of times that server was switched away from.", 271 }}, 272 { 273 Name: "status", 274 MetricMeta: MetricMeta{RateType: metadata.Gauge, 275 Unit: metadata.Weight, 276 Desc: "The current status: 0->UP, 1->Down, 2->NOLB, 3->Maintenance.", 277 }}, 278 { 279 Name: "weight", 280 MetricMeta: MetricMeta{RateType: metadata.Gauge, 281 Unit: metadata.Weight, 282 Desc: "The server weight (server), total weight (backend).", 283 }}, 284 { 285 Name: "act", 286 MetricMeta: MetricMeta{RateType: metadata.Gauge, 287 Unit: metadata.Server, 288 Desc: "If the server is active in the case of servers, or number of active servers in the case of a backend.", 289 }}, 290 { 291 Name: "bck", 292 MetricMeta: MetricMeta{RateType: metadata.Gauge, 293 Unit: metadata.Server, 294 Desc: "If the server is a backup in the case of servers, or number of backup servers in the case of a backend.", 295 }}, 296 { 297 Name: "chkfail", 298 MetricMeta: MetricMeta{RateType: metadata.Counter, 299 Unit: metadata.Check, 300 Desc: "The number of failed checks. (Only counts checks failed when the server is up.)", 301 }}, 302 { 303 Name: "chkdown", 304 MetricMeta: MetricMeta{RateType: metadata.Counter, 305 Unit: metadata.Transition, 306 Desc: "The number of UP->DOWN transitions. The backend counter counts transitions to the whole backend being down, rather than the sum of the counters for each server.", 307 }}, 308 { 309 Name: "lastchg", 310 MetricMeta: MetricMeta{RateType: metadata.Gauge, 311 Unit: metadata.Second, 312 Desc: "The number of seconds since the last UP<->DOWN transition.", 313 }}, 314 { 315 Name: "downtime", 316 MetricMeta: MetricMeta{RateType: metadata.Counter, 317 Unit: metadata.Second, 318 Desc: "The total downtime in seconds. The value for the backend is the downtime for the whole backend, not the sum of the server downtime.", 319 }}, 320 { 321 Name: "qlimit", 322 MetricMeta: MetricMeta{RateType: metadata.Gauge, 323 //Don't know the unit 324 Desc: "The configured maxqueue for the server, or nothing in the value is 0 (default, meaning no limit)", 325 }}, 326 { 327 Name: "pid", 328 Ignore: true, 329 // Not a series or tag so skipping this. 330 }, 331 { 332 Name: "iid", 333 Ignore: true, 334 // Not a series or tag so skipping this. 335 }, 336 { 337 Name: "sid", 338 Ignore: true, 339 // Not a series or tag so skipping this. 340 }, 341 { 342 Name: "throttle", 343 MetricMeta: MetricMeta{RateType: metadata.Gauge, 344 Unit: metadata.Pct, 345 Desc: "The current throttle percentage for the server, when slowstart is active, or no value if not in slowstart.", 346 }}, 347 { 348 Name: "lbtot", 349 MetricMeta: MetricMeta{RateType: metadata.Counter, 350 //Don't know the unit 351 Desc: "The total number of times a server was selected, either for new sessions, or when re-dispatching. The server counter is the number of times that server was selected.", 352 }}, 353 { 354 Name: "tracked", 355 Ignore: true, 356 // This could be a tag, but I am have no use for it. 357 }, 358 { 359 Name: "type", 360 Ignore: true, 361 // This could be a tag, but I am have no use for it. 362 }, 363 { 364 Name: "rate", 365 Ignore: true, 366 // This could be a tag, but I am have no use for it. 367 }, 368 { 369 Name: "rate_lim", 370 MetricMeta: MetricMeta{RateType: metadata.Gauge, 371 Unit: metadata.Session, 372 Desc: "The configured limit on new sessions per second.", 373 }}, 374 { 375 Name: "rate_max", 376 MetricMeta: MetricMeta{RateType: metadata.Counter, 377 Unit: metadata.Session, 378 Desc: "The max number of new sessions per second.", 379 }}, 380 { 381 Name: "check_status", 382 MetricMeta: MetricMeta{RateType: metadata.Gauge, 383 Unit: metadata.StatusCode, 384 Desc: "The status of last health check, one of: 0 -> unknown, 1 -> initializing, 2 -> socket error, 3 -> The check passed on layer 4, but no upper layers testing enabled, 4 -> layer 1-4 timeout, 5 -> layer 1-4 connection problem for example 'Connection refused' (tcp rst) or 'No route to host' (icmp), 6 -> check passed on layer 6, 7 -> layer 6 (SSL) timeout, 8 -> layer 6 invalid response - protocol error, 9 -> check passed on layer 7, 10 -> check conditionally passed on layer 7 for example 404 with disable-on-404, 11 -> layer 7 (HTTP/SMTP) timeout, 12 -> layer 7 invalid response - protocol error, 13 -> layer 7 response error, for example HTTP 5xx.", 385 }}, 386 { 387 Name: "check_code", 388 MetricMeta: MetricMeta{RateType: metadata.Gauge, 389 Unit: metadata.StatusCode, 390 Desc: "The layer5-7 code, if available.", 391 }}, 392 { 393 Name: "check_duration", 394 MetricMeta: MetricMeta{RateType: metadata.Gauge, 395 Unit: metadata.MilliSecond, 396 Desc: "The time in ms it took to finish last health check.", 397 }}, 398 { 399 Name: "hrsp_1xx", 400 //These are transformed and aggregated: 1xx, 2xx, etc will be a tag. 401 }, 402 { 403 Name: "hrsp_2xx", 404 }, 405 { 406 Name: "hrsp_3xx", 407 }, 408 { 409 Name: "hrsp_4xx", 410 }, 411 { 412 Name: "hrsp_5xx", 413 }, 414 { 415 Name: "hrsp_other", 416 }, 417 { 418 Name: "hanafail", 419 // The docs just say "failed health check details", so skipping this 420 // for now 421 }, 422 { 423 Name: "req_rate", 424 // Not needed since data store can derive the rate from req_tot 425 }, 426 { 427 Name: "req_rate_max", 428 MetricMeta: MetricMeta{RateType: metadata.Gauge, 429 Unit: metadata.Request, 430 Desc: "The max number of HTTP requests per second observed.", 431 }}, 432 { 433 Name: "req_tot", 434 MetricMeta: MetricMeta{RateType: metadata.Counter, 435 Unit: metadata.Request, 436 Desc: "The number of HTTP requests received.", 437 }}, 438 { 439 Name: "cli_abrt", 440 MetricMeta: MetricMeta{RateType: metadata.Counter, 441 Unit: metadata.Abort, 442 Desc: "The number of data transfers aborted by the client.", 443 }}, 444 { 445 Name: "srv_abrt", 446 MetricMeta: MetricMeta{RateType: metadata.Counter, 447 Unit: metadata.Abort, 448 Desc: "The number of data transfers aborted by the server.", 449 }}, 450 { 451 Name: "comp_in", 452 MetricMeta: MetricMeta{RateType: metadata.Counter, 453 Unit: metadata.Bytes, 454 Desc: "The number of HTTP response bytes fed to the compressor.", 455 }}, 456 { 457 Name: "comp_out", 458 MetricMeta: MetricMeta{RateType: metadata.Counter, 459 Unit: metadata.Bytes, 460 Desc: "The number of HTTP response bytes emitted by the compressor.", 461 }}, 462 { 463 Name: "comp_byp", 464 MetricMeta: MetricMeta{RateType: metadata.Counter, 465 Unit: metadata.Bytes, 466 Desc: "The number of bytes that bypassed the HTTP compressor (CPU/BW limit).", 467 }}, 468 { 469 Name: "comp_rsp", 470 MetricMeta: MetricMeta{RateType: metadata.Counter, 471 Unit: metadata.Response, 472 Desc: "The number of HTTP responses that were compressed.", 473 }}, 474 { 475 Name: "lastsess", 476 MetricMeta: MetricMeta{RateType: metadata.Gauge, 477 Unit: metadata.Second, 478 Desc: "The number of seconds since last session assigned to server/backend.", 479 }}, 480 { 481 Name: "last_chk", 482 Ignore: true, 483 // Not a series or tag so skipping this. 484 }, 485 { 486 Name: "last_agt", 487 Ignore: true, 488 // Not a series or tag so skipping this. 489 }, 490 { 491 Name: "qtime", 492 MetricMeta: MetricMeta{RateType: metadata.Gauge, 493 Unit: metadata.MilliSecond, 494 Desc: "The average queue time in ms over the 1024 last requests.", 495 }}, 496 { 497 Name: "ctime", 498 MetricMeta: MetricMeta{RateType: metadata.Gauge, 499 Unit: metadata.MilliSecond, 500 Desc: "The average connect time in ms over the 1024 last requests.", 501 }}, 502 { 503 Name: "rtime", 504 MetricMeta: MetricMeta{RateType: metadata.Gauge, 505 Unit: metadata.MilliSecond, 506 Desc: "The average response time in ms over the 1024 last requests (0 for TCP).", 507 }}, 508 { 509 Name: "ttime", 510 MetricMeta: MetricMeta{RateType: metadata.Gauge, 511 Unit: metadata.MilliSecond, 512 Desc: "The average total session time in ms over the 1024 last requests.", 513 }}, 514 { 515 Name: "agent_status", 516 Ignore: true, 517 // Not a series or tag so skipping this. 518 }, 519 { 520 Name: "agent_code", 521 Ignore: true, 522 // Unused 523 }, 524 { 525 Name: "agent_duration", 526 MetricMeta: MetricMeta{RateType: metadata.Gauge, 527 Unit: metadata.MilliSecond, 528 Desc: "Time in ms taken to finish last check", 529 }}, 530 { 531 Name: "check_desc", 532 Ignore: true, 533 // Can't parse 534 }, 535 { 536 Name: "agent_desc", 537 Ignore: true, 538 // Can't parse 539 }, 540 { 541 Name: "check_rise", 542 MetricMeta: MetricMeta{RateType: metadata.Gauge, 543 Unit: metadata.StatusCode, 544 Desc: "Server's 'rise' parameter used by checks", 545 }}, 546 { 547 Name: "check_fall", 548 MetricMeta: MetricMeta{RateType: metadata.Gauge, 549 Unit: metadata.StatusCode, 550 Desc: "Server's 'fall' parameter used by checks", 551 }}, 552 { 553 Name: "check_health", 554 MetricMeta: MetricMeta{RateType: metadata.Gauge, 555 Unit: metadata.StatusCode, 556 Desc: "Server's health check value", 557 }}, 558 { 559 Name: "agent_rise", 560 MetricMeta: MetricMeta{RateType: metadata.Gauge, 561 Unit: metadata.StatusCode, 562 Desc: "Agents's 'rise' parameter", 563 }}, 564 { 565 Name: "agent_fall", 566 MetricMeta: MetricMeta{RateType: metadata.Gauge, 567 Unit: metadata.StatusCode, 568 Desc: "Agents's 'fall' parameter", 569 }}, 570 { 571 Name: "agent_health", 572 MetricMeta: MetricMeta{RateType: metadata.Gauge, 573 Unit: metadata.StatusCode, 574 Desc: "Agents's 'health' parameter", 575 }}, 576 { 577 Name: "addr", 578 Ignore: true, 579 // Can't parse 580 }, 581 { 582 Name: "cookie", 583 Ignore: true, 584 // Can't parse 585 }, 586 { 587 Name: "mode", 588 Ignore: true, 589 // Can't parse 590 }, 591 { 592 Name: "algo", 593 Ignore: true, 594 // Can't parse 595 }, 596 { 597 Name: "conn_rate", 598 MetricMeta: MetricMeta{RateType: metadata.Gauge, 599 Unit: metadata.Request, 600 Desc: "Number of connections over the last elapsed second", 601 }}, 602 { 603 Name: "conn_rate_max", 604 MetricMeta: MetricMeta{RateType: metadata.Gauge, 605 Unit: metadata.Request, 606 Desc: "Highest known conn_rate", 607 }}, 608 { 609 Name: "conn_tot", 610 MetricMeta: MetricMeta{RateType: metadata.Gauge, 611 Unit: metadata.Request, 612 Desc: "Cumulative number of connections", 613 }}, 614 { 615 Name: "intercepted", 616 MetricMeta: MetricMeta{RateType: metadata.Gauge, 617 Unit: metadata.Request, 618 Desc: "Cumulative number of intercepted requests (monitor, stats)", 619 }}, 620 { 621 Name: "dcon", 622 MetricMeta: MetricMeta{RateType: metadata.Gauge, 623 Unit: metadata.Request, 624 Desc: "Requests denied by 'tcp-request connection' rules", 625 }}, 626 { 627 Name: "dses", 628 MetricMeta: MetricMeta{RateType: metadata.Gauge, 629 Unit: metadata.Request, 630 Desc: "Requests denied by 'tcp-request session' rules", 631 }}, 632 }