bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/riak.go (about) 1 package collectors 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "net/http" 7 "net/url" 8 "strings" 9 10 "bosun.org/metadata" 11 "bosun.org/opentsdb" 12 ) 13 14 var riakMeta = map[string]MetricMeta{ 15 "pbc_connects_total": { 16 Metric: "pbc_connections", 17 RateType: metadata.Counter, 18 Unit: metadata.Connection, 19 Desc: "Total number of Protocol Buffers connections made.", 20 }, 21 "read_repairs_total": { 22 Metric: "read_repairs", 23 RateType: metadata.Counter, 24 Unit: metadata.Operation, 25 Desc: "Total number of Read Repairs this node has coordinated.", 26 }, 27 "read_repairs_primary_outofdate_count": { 28 Metric: "read_repairs_primary_outofdate", 29 RateType: metadata.Counter, 30 Unit: metadata.Operation, 31 Desc: "Total number of read repair operations performed on primary vnodes due to stale replicas.", 32 }, 33 "read_repairs_primary_notfound_count": { 34 Metric: "read_repairs_primary_notfound", 35 RateType: metadata.Counter, 36 Unit: metadata.Operation, 37 Desc: "Total number of read repair operations performed on primary vnodes due to missing replicas.", 38 }, 39 "read_repairs_fallback_outofdate_count": { 40 Metric: "read_repairs_fallback_outofdate", 41 RateType: metadata.Counter, 42 Unit: metadata.Operation, 43 Desc: "Total number of read repair operations performed on fallback vnodes due to stale replicas.", 44 }, 45 "read_repairs_fallback_notfound_count": { 46 Metric: "read_repairs_fallback_notfound", 47 RateType: metadata.Counter, 48 Unit: metadata.Operation, 49 Desc: "Total number of read repair operations performed on fallback vnodes due to missing replicas.", 50 }, 51 "coord_redirs_total": { 52 Metric: "coord_redirs", 53 RateType: metadata.Counter, 54 Unit: metadata.Operation, 55 Desc: "Total number of requests this node has redirected to other nodes for coordination.", 56 }, 57 "precommit_fail": { 58 Metric: "precommit_fail", 59 RateType: metadata.Counter, 60 Unit: metadata.Event, 61 Desc: "Total number of pre-commit hook failures.", 62 }, 63 "postcommit_fail": { 64 Metric: "postcommit_fail", 65 RateType: metadata.Counter, 66 Unit: metadata.Event, 67 Desc: "Total number of post-commit hook failures.", 68 }, 69 "executing_mappers": { 70 Metric: "executing_mappers", 71 RateType: metadata.Gauge, 72 Unit: metadata.Process, 73 }, 74 "pipeline_create_count": { 75 Metric: "pipeline.create.count", 76 RateType: metadata.Counter, 77 Unit: metadata.Process, 78 Desc: "The total number of pipelines created since the node was started.", 79 }, 80 "pipeline_create_error_count": { 81 Metric: "pipeline.create.errors", 82 RateType: metadata.Counter, 83 Unit: metadata.Event, 84 Desc: "The total number of pipeline creation errors since the node was started.", 85 }, 86 "pipeline_active": { 87 Metric: "active", 88 TagSet: opentsdb.TagSet{"type": "pbc"}, 89 RateType: metadata.Gauge, 90 Unit: metadata.Process, 91 Desc: "The number of pipelines active in the last 60 seconds.", 92 }, 93 "index_fsm_active": { 94 Metric: "active", 95 TagSet: opentsdb.TagSet{"type": "index"}, 96 RateType: metadata.Gauge, 97 Unit: metadata.Process, 98 Desc: "Number of active Secondary Index FSMs.", 99 }, 100 "list_fsm_active": { 101 Metric: "active", 102 TagSet: opentsdb.TagSet{"type": "list"}, 103 RateType: metadata.Gauge, 104 Unit: metadata.Process, 105 Desc: "Number of active Keylisting FSMs.", 106 }, 107 108 "memory_total": { 109 Metric: "memory", 110 TagSet: opentsdb.TagSet{"type": "total"}, 111 RateType: metadata.Gauge, 112 Unit: metadata.Bytes, 113 Desc: "Total allocated memory (sum of processes and system).", 114 }, 115 "memory_processes": { 116 Metric: "memory", 117 TagSet: opentsdb.TagSet{"type": "processes"}, 118 RateType: metadata.Gauge, 119 Unit: metadata.Bytes, 120 Desc: "Total amount of memory allocated for Erlang processes.", 121 }, 122 "memory_processes_used": { 123 Metric: "memory", 124 TagSet: opentsdb.TagSet{"type": "processes_used"}, 125 RateType: metadata.Gauge, 126 Unit: metadata.Bytes, 127 Desc: "Total amount of memory used by Erlang processes.", 128 }, 129 "memory_system": { 130 Metric: "memory", 131 TagSet: opentsdb.TagSet{"type": "system"}, 132 RateType: metadata.Gauge, 133 Unit: metadata.Bytes, 134 Desc: "Total allocated memory that is not directly related to an Erlang process.", 135 }, 136 "memory_system_used": { 137 Metric: "memory", 138 TagSet: opentsdb.TagSet{"type": "system_used"}, 139 RateType: metadata.Gauge, 140 Unit: metadata.Bytes, 141 }, 142 "memory_atom": { 143 Metric: "memory", 144 TagSet: opentsdb.TagSet{"type": "atom"}, 145 RateType: metadata.Gauge, 146 Unit: metadata.Bytes, 147 Desc: "Total amount of memory currently allocated for atom storage.", 148 }, 149 "memory_atom_used": { 150 Metric: "memory", 151 TagSet: opentsdb.TagSet{"type": "atom_used"}, 152 RateType: metadata.Gauge, 153 Unit: metadata.Bytes, 154 Desc: "Total amount of memory currently used for atom storage.", 155 }, 156 "memory_binary": { 157 Metric: "memory", 158 TagSet: opentsdb.TagSet{"type": "binary"}, 159 RateType: metadata.Gauge, 160 Unit: metadata.Bytes, 161 Desc: "Total amount of memory used for binaries.", 162 }, 163 "memory_code": { 164 Metric: "memory", 165 TagSet: opentsdb.TagSet{"type": "code"}, 166 RateType: metadata.Gauge, 167 Unit: metadata.Bytes, 168 Desc: "Total amount of memory allocated for Erlang code.", 169 }, 170 "memory_ets": { 171 Metric: "memory", 172 TagSet: opentsdb.TagSet{"type": "ets"}, 173 RateType: metadata.Gauge, 174 Unit: metadata.Bytes, 175 Desc: "Total memory allocated for Erlang Term Storage.", 176 }, 177 "mem_total": { 178 Metric: "memory", 179 TagSet: opentsdb.TagSet{"type": "available"}, 180 RateType: metadata.Gauge, 181 Unit: metadata.Bytes, 182 Desc: "Total available system memory.", 183 }, 184 "mem_allocated": { 185 Metric: "memory", 186 TagSet: opentsdb.TagSet{"type": "allocated"}, 187 RateType: metadata.Gauge, 188 Unit: metadata.Bytes, 189 Desc: "Total memory allocated for this node.", 190 }, 191 192 "vnode_index_reads_total": { 193 Metric: "vnode.index.requests", 194 RateType: metadata.Counter, 195 Unit: metadata.Operation, 196 Desc: "Total number of local replicas participating in secondary index reads.", 197 }, 198 "vnode_index_writes_total": { 199 Metric: "vnode.index.requests", 200 TagSet: opentsdb.TagSet{"type": "write"}, 201 RateType: metadata.Counter, 202 Unit: metadata.Operation, 203 Desc: "Total number of local replicas participating in secondary index writes.", 204 }, 205 "vnode_index_deletes_total": { 206 Metric: "vnode.index.requests", 207 TagSet: opentsdb.TagSet{"type": "delete"}, 208 RateType: metadata.Counter, 209 Unit: metadata.Operation, 210 Desc: "Total number of local replicas participating in secondary index deletes.", 211 }, 212 "vnode_index_writes_postings_total": { 213 Metric: "vnode.index.requests", 214 TagSet: opentsdb.TagSet{"type": "write_post"}, 215 RateType: metadata.Counter, 216 Unit: metadata.Operation, 217 Desc: "Total number of individual secondary index values written.", 218 }, 219 "vnode_index_deletes_postings_total": { 220 Metric: "vnode.index.requests", 221 TagSet: opentsdb.TagSet{"type": "delete_post"}, 222 RateType: metadata.Counter, 223 Unit: metadata.Operation, 224 Desc: "Total number of individual secondary index values deleted.", 225 }, 226 227 "vnode_gets_total": { 228 Metric: "vnode.requests", 229 TagSet: opentsdb.TagSet{"type": "get"}, 230 RateType: metadata.Counter, 231 Unit: metadata.Operation, 232 Desc: "Total number of GETs coordinated by local vnodes.", 233 }, 234 "vnode_puts_total": { 235 Metric: "vnode.requests", 236 TagSet: opentsdb.TagSet{"type": "put"}, 237 RateType: metadata.Counter, 238 Unit: metadata.Operation, 239 Desc: "Total number of PUTS coordinated by local vnodes.", 240 }, 241 "node_gets_total": { 242 Metric: "node.requests", 243 TagSet: opentsdb.TagSet{"type": "get"}, 244 RateType: metadata.Counter, 245 Unit: metadata.Operation, 246 Desc: "Total number of GETs coordinated by this node, including GETs to non-local vnodes.", 247 }, 248 "node_puts_total": { 249 Metric: "node.requests", 250 TagSet: opentsdb.TagSet{"type": "put"}, 251 RateType: metadata.Counter, 252 Unit: metadata.Operation, 253 Desc: "Total number of PUTs coordinated by this node, including PUTs to non-local vnodes.", 254 }, 255 "node_get_fsm_time_mean": { 256 Metric: "node.latency.mean", 257 TagSet: opentsdb.TagSet{"type": "get"}, 258 RateType: metadata.Gauge, 259 Unit: metadata.Second, 260 Desc: "Mean time between reception of client GET request and subsequent response to client.", 261 }, 262 "node_put_fsm_time_mean": { 263 Metric: "node.latency.mean", 264 TagSet: opentsdb.TagSet{"type": "put"}, 265 RateType: metadata.Gauge, 266 Unit: metadata.Second, 267 Desc: "Mean time between reception of client PUT request and subsequent response to client.", 268 }, 269 "node_get_fsm_time_median": { 270 Metric: "node.latency.median", 271 TagSet: opentsdb.TagSet{"type": "get"}, 272 RateType: metadata.Gauge, 273 Unit: metadata.Second, 274 Desc: "Median time between reception of client GET request and subsequent response to client.", 275 }, 276 "node_put_fsm_time_median": { 277 Metric: "node.latency.median", 278 TagSet: opentsdb.TagSet{"type": "put"}, 279 RateType: metadata.Gauge, 280 Unit: metadata.Second, 281 Desc: "Median time between reception of client PUT request and subsequent response to client.", 282 }, 283 "node_get_fsm_time_95": { 284 Metric: "node.latency.95th", 285 TagSet: opentsdb.TagSet{"type": "get"}, 286 RateType: metadata.Gauge, 287 Unit: metadata.Second, 288 Desc: "95th percentile time between reception of client GET request and subsequent response to client.", 289 }, 290 "node_put_fsm_time_95": { 291 Metric: "node.latency.95th", 292 TagSet: opentsdb.TagSet{"type": "put"}, 293 RateType: metadata.Gauge, 294 Unit: metadata.Second, 295 Desc: "95th percentile time between reception of client PUT request and subsequent response to client.", 296 }, 297 "node_get_fsm_time_99": { 298 Metric: "node.latency.99th", 299 TagSet: opentsdb.TagSet{"type": "get"}, 300 RateType: metadata.Gauge, 301 Unit: metadata.Second, 302 Desc: "99th percentile time between reception of client GET request and subsequent response to client.", 303 }, 304 "node_put_fsm_time_99": { 305 Metric: "node.latency.99th", 306 TagSet: opentsdb.TagSet{"type": "put"}, 307 RateType: metadata.Gauge, 308 Unit: metadata.Second, 309 Desc: "99th percentile time between reception of client PUT request and subsequent response to client.", 310 }, 311 "node_get_fsm_time_100": { 312 Metric: "node.latency.100th", 313 TagSet: opentsdb.TagSet{"type": "get"}, 314 RateType: metadata.Gauge, 315 Unit: metadata.Second, 316 Desc: "100th percentile time between reception of client GET request and subsequent response to client.", 317 }, 318 "node_put_fsm_time_100": { 319 Metric: "node.latency.100th", 320 TagSet: opentsdb.TagSet{"type": "put"}, 321 RateType: metadata.Gauge, 322 Unit: metadata.Second, 323 Desc: "100th percentile time between reception of client PUT request and subsequent response to client.", 324 }, 325 "node_get_fsm_objsize_mean": { 326 Metric: "node.objsize.mean", 327 TagSet: opentsdb.TagSet{"type": "get"}, 328 RateType: metadata.Gauge, 329 Unit: metadata.Bytes, 330 Desc: "Mean object size encountered by this node within the last minute.", 331 }, 332 "node_get_fsm_objsize_median": { 333 Metric: "node.objsize.median", 334 TagSet: opentsdb.TagSet{"type": "get"}, 335 RateType: metadata.Gauge, 336 Unit: metadata.Bytes, 337 Desc: "Median object size encountered by this node within the last minute.", 338 }, 339 "node_get_fsm_objsize_95": { 340 Metric: "node.objsize.95th", 341 TagSet: opentsdb.TagSet{"type": "get"}, 342 RateType: metadata.Gauge, 343 Unit: metadata.Bytes, 344 Desc: "95th percentile object size encountered by this node within the last minute.", 345 }, 346 "node_get_fsm_objsize_99": { 347 Metric: "node.objsize.99th", 348 TagSet: opentsdb.TagSet{"type": "get"}, 349 RateType: metadata.Gauge, 350 Unit: metadata.Bytes, 351 Desc: "99th percentile object size encountered by this node within the last minute.", 352 }, 353 "node_get_fsm_objsize_100": { 354 Metric: "node.objsize.100th", 355 TagSet: opentsdb.TagSet{"type": "get"}, 356 RateType: metadata.Gauge, 357 Unit: metadata.Bytes, 358 Desc: "100th percentile object size encountered by this node within the last minute.", 359 }, 360 "node_get_fsm_siblings_mean": { 361 Metric: "node.siblings.mean", 362 TagSet: opentsdb.TagSet{"type": "get"}, 363 RateType: metadata.Gauge, 364 Unit: metadata.Count, 365 Desc: "Mean number of siblings encountered during all GET operations by this node within the last minute.", 366 }, 367 "node_get_fsm_siblings_median": { 368 Metric: "node.siblings.median", 369 TagSet: opentsdb.TagSet{"type": "get"}, 370 RateType: metadata.Gauge, 371 Unit: metadata.Count, 372 Desc: "Median number of siblings encountered during all GET operations by this node within the last minute.", 373 }, 374 "node_get_fsm_siblings_95": { 375 Metric: "node.siblings.95th", 376 TagSet: opentsdb.TagSet{"type": "get"}, 377 RateType: metadata.Gauge, 378 Unit: metadata.Count, 379 Desc: "95th percentile of siblings encountered during all GET operations by this node within the last minute.", 380 }, 381 "node_get_fsm_siblings_99": { 382 Metric: "node.siblings.99th", 383 TagSet: opentsdb.TagSet{"type": "get"}, 384 RateType: metadata.Gauge, 385 Unit: metadata.Count, 386 Desc: "99th percentile of siblings encountered during all GET operations by this node within the last minute.", 387 }, 388 "node_get_fsm_siblings_100": { 389 Metric: "node.siblings.100th", 390 TagSet: opentsdb.TagSet{"type": "get"}, 391 RateType: metadata.Gauge, 392 Unit: metadata.Count, 393 Desc: "100th percentile of siblings encountered during all GET operations by this node within the last minute.", 394 }, 395 "node_get_fsm_rejected_total": { 396 Metric: "node.requests.rejected", 397 TagSet: opentsdb.TagSet{"type": "get"}, 398 RateType: metadata.Counter, 399 Unit: metadata.Event, 400 Desc: "Total number of GET FSMs rejected by Sidejob's overload protection.", 401 }, 402 "node_put_fsm_rejected_total": { 403 Metric: "node.requests.rejected", 404 TagSet: opentsdb.TagSet{"type": "put"}, 405 RateType: metadata.Counter, 406 Unit: metadata.Event, 407 Desc: "Total number of PUT FSMs rejected by Sidejob's overload protection.", 408 }, 409 "ring_num_partitions": { 410 Metric: "ring_num_partitions", 411 RateType: metadata.Gauge, 412 Unit: metadata.Count, 413 Desc: "The number of partitions in the ring.", 414 }, 415 "ring_creation_size": { 416 Metric: "ring.creation_size", 417 RateType: metadata.Gauge, 418 Unit: metadata.Count, 419 Desc: "Ring size this cluster was created with.", 420 }, 421 "cpu_nprocs": { 422 Metric: "cpu.nprocs", 423 RateType: metadata.Gauge, 424 Unit: metadata.Count, 425 Desc: "Number of operating system processes.", 426 }, 427 "cpu_avg1": { 428 Metric: "cpu.avg1", 429 RateType: metadata.Gauge, 430 Unit: metadata.Load, 431 Desc: "The average number of active processes for the last 1 minute (equivalent to top(1) command’s load average when divided by 256()).", 432 }, 433 "cpu_avg5": { 434 Metric: "cpu.avg5", 435 RateType: metadata.Gauge, 436 Unit: metadata.Load, 437 Desc: "The average number of active processes for the last 5 minutes (equivalent to top(1) command’s load average when divided by 256()).", 438 }, 439 "cpu_avg15": { 440 Metric: "cpu.avg15", 441 RateType: metadata.Gauge, 442 Unit: metadata.Load, 443 Desc: "The average number of active processes for the last 15 minutes (equivalent to top(1) command’s load average when divided by 256()).", 444 }, 445 "riak_search_vnodeq_total": { 446 Metric: "search.vnodeq", 447 RateType: metadata.Counter, 448 Unit: metadata.Event, 449 Desc: "Total number of unprocessed messages all vnode message queues in the Riak Search subsystem have received on this node since it was started.", 450 }, 451 "riak_search_vnodes_running": { 452 Metric: "search.vnodes_running", 453 RateType: metadata.Gauge, 454 Unit: metadata.Process, 455 Desc: "Total number of vnodes currently running in the Riak Search subsystem.", 456 }, 457 } 458 459 func init() { 460 collectors = append(collectors, &IntervalCollector{F: c_riak, Enable: enableRiak}) 461 } 462 463 const ( 464 localRiakURL string = "http://localhost:8098/stats" 465 ) 466 467 func Riak(s string) error { 468 u, err := url.Parse(s) 469 if err != nil { 470 return err 471 } 472 collectors = append(collectors, 473 &IntervalCollector{ 474 F: func() (opentsdb.MultiDataPoint, error) { 475 return riak(s) 476 }, 477 name: fmt.Sprintf("riak-%s", u.Host), 478 }) 479 return nil 480 } 481 482 func enableRiak() bool { 483 return enableURL(localRiakURL)() 484 } 485 486 func c_riak() (opentsdb.MultiDataPoint, error) { 487 return riak(localRiakURL) 488 } 489 490 func riak(s string) (opentsdb.MultiDataPoint, error) { 491 var md opentsdb.MultiDataPoint 492 res, err := http.Get(s) 493 if err != nil { 494 return nil, err 495 } 496 defer res.Body.Close() 497 var r map[string]interface{} 498 if err := json.NewDecoder(res.Body).Decode(&r); err != nil { 499 return nil, err 500 } 501 for k, v := range r { 502 if m, ok := riakMeta[k]; ok { 503 if v == "undefined" { 504 continue 505 } 506 if strings.HasPrefix(m.Metric, "node.latency") { 507 if nl, ok := v.(float64); ok { 508 v = nl / 1000000 509 } else { 510 err := fmt.Errorf("riak: bad integer %s in metric '%s'", v, m.Metric) 511 return nil, err 512 } 513 } 514 Add(&md, "riak."+m.Metric, v, m.TagSet, m.RateType, m.Unit, m.Desc) 515 } else if k == "connected_nodes" { 516 nodes, ok := v.([]interface{}) 517 // 'connected_nodes' array can be empty 518 if !ok { 519 err := fmt.Errorf("riak: unexpected content or type for 'connected_nodes' metric array") 520 return nil, err 521 } 522 Add(&md, "riak.connected_nodes", len(nodes), nil, metadata.Gauge, metadata.Count, descConNodes) 523 } else if k == "ring_members" { 524 ringMembers, ok := v.([]interface{}) 525 // at least one ring member must always exist 526 if !ok || len(ringMembers) < 1 { 527 err := fmt.Errorf("riak: unexpected content or type for 'ring_members' metric array") 528 return nil, err 529 } 530 Add(&md, "riak.ring_members", len(ringMembers), nil, metadata.Gauge, metadata.Count, descRingMembers) 531 } 532 } 533 return md, nil 534 } 535 536 const ( 537 descConNodes = "Count of nodes that this node is aware of at this time." 538 descRingMembers = "Count of nodes that are members of the ring." 539 )