bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/redis_linux.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "regexp" 6 "strconv" 7 "strings" 8 "time" 9 10 "github.com/garyburd/redigo/redis" 11 12 "bosun.org/metadata" 13 "bosun.org/opentsdb" 14 "bosun.org/util" 15 ) 16 17 func init() { 18 collectors = append(collectors, &IntervalCollector{F: c_redis, init: redisInit}) 19 } 20 21 var redisMeta = map[string]MetricMeta{ // http://redis.io/commands/info) 22 // Persistence Section 23 // AOF 24 "aof_enabled": { 25 RateType: metadata.Gauge, 26 Unit: metadata.Enabled, 27 Desc: "AOF Enabled indicates that Append Only File logging is activated.", 28 }, 29 "aof_current_size": { 30 RateType: metadata.Gauge, 31 Unit: metadata.Bytes, 32 Desc: "The current file size of the AOF (Append Only File).", 33 }, 34 "aof_rewrite_in_progress": { 35 RateType: metadata.Gauge, 36 Unit: metadata.InProgress, 37 Desc: "Rewrite in progress indicates that AOF (Append Only File) logging is activated.", 38 }, 39 "aof_rewrite_scheduled": { 40 RateType: metadata.Gauge, 41 Unit: metadata.Scheduled, 42 Desc: "AOF rewrite scheduled means an Append Only file rewrite operation will be scheduled once the on-going RDB save is complete.", 43 }, 44 "aof_last_rewrite_time_sec": { 45 RateType: metadata.Gauge, 46 Unit: metadata.Second, 47 Desc: "The duration of the last AOF (Append Only file) rewrite operation in seconds.", 48 }, 49 "aof_current_rewrite_time_sec": { 50 RateType: metadata.Gauge, 51 Unit: metadata.Second, 52 Desc: "The duration of the ongoing AOF (Append Only file) rewrite operation in seconds -- if there is one.", 53 }, 54 "aof_last_bgrewrite_status": { 55 RateType: metadata.Gauge, 56 Unit: metadata.Bool, 57 Desc: "The status of the last AOF (Append Only File) rewrite opperation.", 58 }, 59 // RDB 60 "rdb_bgsave_in_progress": { 61 RateType: metadata.Gauge, 62 Unit: metadata.InProgress, 63 Desc: "BGSAVE in progress indicates if a RDB save is on-going.", 64 }, 65 "rdb_changes_since_last_save": { 66 RateType: metadata.Gauge, 67 Unit: metadata.Change, 68 Desc: "The number of operations that produced some kind of changes in the dataset since the last time either SAVE or BGSAVE was called.", 69 }, 70 "rdb_last_bgsave_status": { 71 RateType: metadata.Gauge, 72 Unit: metadata.Bool, 73 Desc: "The Status of the last RDB save operation.", 74 }, 75 "rdb_last_bgsave_time_sec": { 76 RateType: metadata.Gauge, 77 Unit: metadata.Second, 78 Desc: "The duration of the last RDB save operation.", 79 }, 80 "rdb_current_bgsave_time_sec": { 81 RateType: metadata.Gauge, 82 Unit: metadata.Second, 83 Desc: "The duration of the ongoing RDB save operation -- if there is one.", 84 }, 85 "rdb_last_save_time": { 86 RateType: metadata.Gauge, 87 Unit: metadata.Timestamp, 88 Desc: "The epoch-based timestamp of last successful RDB save.", 89 }, 90 91 // Clients Section 92 "blocked_clients": { 93 RateType: metadata.Gauge, 94 Unit: metadata.Client, 95 Desc: "The number of clients pending on a blocking call (BLPOP, BRPOP, BRPOPLPUSH).", 96 }, 97 "connected_clients": { 98 RateType: metadata.Gauge, 99 Unit: metadata.Connection, 100 Desc: "The number of client connections (excluding connections from slaves).", 101 }, 102 "client_biggest_input_buf": { 103 RateType: metadata.Gauge, 104 Unit: metadata.Count, // Need to figure out what this is, bytes? 105 Desc: "The biggest input buffer among current client connections.", 106 }, 107 "client_longest_output_list": { 108 RateType: metadata.Gauge, 109 Unit: metadata.Count, // Need to figure out what this is, length? 110 Desc: "The longest output list among current client connections.", 111 }, 112 113 // Replication Sections 114 "connected_slaves": { 115 RateType: metadata.Gauge, 116 Unit: metadata.Slave, 117 Desc: "The number of connected slaves.", 118 }, 119 "master_link_status": { 120 RateType: metadata.Gauge, 121 Unit: metadata.Ok, 122 Desc: "The up/down status of the link to the master.", 123 }, 124 "master_last_io_seconds_ago": { 125 RateType: metadata.Gauge, 126 Unit: metadata.Second, 127 Desc: "The number of seconds since the last interaction with master.", 128 }, 129 "master_sync_in_progress": { 130 RateType: metadata.Gauge, 131 Unit: metadata.InProgress, 132 Desc: "Master sync in progress indicates that the master is syncing to the slave.", 133 }, 134 "master_sync_left_bytes": { 135 RateType: metadata.Gauge, 136 Unit: metadata.Bytes, 137 Desc: "The number of bytes left before syncing is complete.", 138 }, 139 "master_sync_last_io_seconds_ago": { 140 RateType: metadata.Gauge, 141 Unit: metadata.Second, 142 Desc: "The number of seconds since last transfer I/O during a SYNC operation.", 143 }, 144 145 // Stats Section 146 "evicted_keys": { 147 RateType: metadata.Counter, 148 Unit: metadata.Key, 149 Desc: "The number of evicted keys due to maxmemory limit.", 150 }, 151 "expired_keys": { 152 RateType: metadata.Counter, 153 Unit: metadata.Key, 154 Desc: "The total total number of key expiration events.", 155 }, 156 "keyspace_hits": { 157 RateType: metadata.Counter, 158 Unit: metadata.CacheHit, 159 Desc: "The number of successful lookup of keys in the main dictionary.", 160 }, 161 "keyspace_misses": { 162 RateType: metadata.Counter, 163 Unit: metadata.CacheMiss, 164 Desc: "The number of failed lookup of keys in the main dictionary.", 165 }, 166 "used_cpu_sys": { 167 RateType: metadata.Counter, 168 Unit: metadata.Pct, 169 Desc: "The system CPU used by the main Redis process.", 170 }, 171 "used_cpu_user": { 172 RateType: metadata.Counter, 173 Unit: metadata.Pct, 174 Desc: "The user space CPU used by the main Redis process.", 175 }, 176 "uptime_in_seconds": { 177 RateType: metadata.Gauge, 178 Unit: metadata.Second, 179 Desc: "The number of seconds since Redis server start.", 180 }, 181 "total_connections_received": { 182 RateType: metadata.Counter, 183 Unit: metadata.Connection, 184 Desc: "The total number of connections accepted by the server.", 185 }, 186 "total_commands_processed": { 187 RateType: metadata.Counter, 188 Unit: metadata.Command, 189 Desc: "The total number of commands processed by the server.", 190 }, 191 "pubsub_channels": { 192 RateType: metadata.Gauge, 193 Unit: metadata.Channel, 194 Desc: "Global number of pub/sub channels with client subscriptions.", 195 }, 196 "pubsub_patterns": { 197 RateType: metadata.Gauge, 198 Unit: "Pattern", 199 Desc: "Global number of pub/sub channels with client subscriptions.", 200 }, 201 "rejected_connections": { 202 RateType: metadata.Counter, 203 Unit: metadata.Connection, 204 Desc: "The number of connections rejected because of maxclients limit.", 205 }, 206 "sync_full": { 207 RateType: metadata.Gauge, // Although the sync metrics are counters, it is not something by default you would want as a rate per second 208 Unit: metadata.Resync, 209 Desc: "The number of full resynchronizations with slaves.", 210 }, 211 "sync_partial_ok": { 212 RateType: metadata.Gauge, 213 Unit: metadata.Resync, 214 Desc: "The number of accepted PSYNC (partial resynchronization) requests.", 215 }, 216 "sync_partial_err": { 217 RateType: metadata.Gauge, 218 Unit: metadata.Resync, 219 Desc: "The number of unaccepted PSYNC (partial resynchronization) requests.", 220 }, 221 222 // Memory Section 223 "used_memory": { 224 RateType: metadata.Gauge, 225 Unit: metadata.Bytes, 226 Desc: "The total number of bytes allocated by Redis using its allocator (either standard libc, jemalloc, or an alternative allocator such as tcmalloc.", 227 }, 228 "used_memory_rss": { 229 RateType: metadata.Gauge, 230 Unit: metadata.Bytes, 231 Desc: "The number of bytes that Redis allocated as seen by the operating system (a.k.a resident set size). This is the number reported by tools such as top(1) and ps(1).", 232 }, 233 "mem_fragmentation_ratio": { 234 RateType: metadata.Gauge, 235 Unit: metadata.Ratio, 236 Desc: "The ratio between used_memory_rss and used_memory.", 237 }, 238 239 //Other 240 "role": {}, // This gets treated independtly to create the is_slave metric 241 } 242 243 // For master_link_status. 244 var redisMlsMap = map[string]string{ 245 "up": "1", 246 "down": "0", 247 } 248 249 // For aof_last_bgrewrite_status, rdb_last_bgsave_status. 250 func status(s string) string { 251 if s == "ok" { 252 return "1" 253 } 254 return "0" 255 } 256 257 // For role which translates to is_slave 258 func slave(s string) string { 259 if s == "slave" { 260 return "1" 261 } 262 return "0" 263 } 264 265 var ( 266 tcRE = regexp.MustCompile(`^\s*#\s*scollector.(\w+)\s*=\s*(.+)$`) 267 redisInstances []opentsdb.TagSet 268 ) 269 270 func redisScollectorTags(cfg string) map[string]string { 271 m := make(opentsdb.TagSet) 272 readLine(cfg, func(cfgline string) error { 273 result := tcRE.FindStringSubmatch(cfgline) 274 if len(result) == 3 { 275 m[result[1]] = result[2] 276 } 277 return nil 278 }) 279 return m 280 } 281 282 func redisInit() { 283 update := func() { 284 var instances []opentsdb.TagSet 285 oldRedis := false 286 add := func(port string) { 287 ri := make(opentsdb.TagSet) 288 ri["port"] = port 289 instances = append(instances, ri) 290 } 291 util.ReadCommand(func(line string) error { 292 sp := strings.Fields(line) 293 if len(sp) != 3 || !strings.HasSuffix(sp[1], "redis-server") { 294 return nil 295 } 296 if !strings.Contains(sp[2], ":") { 297 oldRedis = true 298 return nil 299 } 300 port := strings.Split(sp[2], ":")[1] 301 if port != "0" && InContainer(sp[0]) == false { 302 add(port) 303 } 304 return nil 305 }, "ps", "-e", "-o", "pid,args") 306 if oldRedis { 307 util.ReadCommand(func(line string) error { 308 if !strings.Contains(line, "redis-server") { 309 return nil 310 } 311 sp := strings.Fields(line) 312 if len(sp) < 7 || !strings.Contains(sp[3], ":") { 313 return nil 314 } 315 port := strings.Split(sp[3], ":")[1] 316 add(port) 317 return nil 318 }, "netstat", "-tnlp") 319 } 320 redisInstances = instances 321 } 322 update() 323 go func() { 324 for range time.Tick(time.Minute * 5) { 325 update() 326 } 327 }() 328 } 329 330 func redisKeyCount(line string) (int64, error) { 331 err := fmt.Errorf("Error parsing keyspace line from redis info: %v", line) 332 colSplit := strings.Split(line, ":") 333 if len(colSplit) < 2 { 334 return 0, err 335 } 336 comSplit := strings.Split(colSplit[1], ",") 337 if len(comSplit) != 3 { 338 return 0, err 339 } 340 eqSplit := strings.Split(comSplit[0], "=") 341 if len(eqSplit) != 2 || eqSplit[0] != "keys" { 342 return 0, err 343 } 344 v, err := strconv.ParseInt(eqSplit[1], 10, 64) 345 if err != nil { 346 return 0, err 347 } 348 return v, nil 349 } 350 351 func c_redis() (opentsdb.MultiDataPoint, error) { 352 var md opentsdb.MultiDataPoint 353 var Error error 354 for _, instance := range redisInstances { 355 c, err := redis.Dial("tcp", fmt.Sprintf(":%s", instance["port"])) 356 if err != nil { 357 Error = err 358 continue 359 } 360 defer c.Close() 361 info, err := c.Do("info", "all") 362 if err != nil { 363 Error = err 364 continue 365 } 366 tags := instance.Copy() 367 infoSplit := strings.Split(string(info.([]uint8)), "\n") 368 for _, line := range infoSplit { 369 line = strings.TrimSpace(line) 370 sp := strings.Split(line, ":") 371 if len(sp) < 2 || sp[0] != "config_file" { 372 continue 373 } 374 if sp[1] != "" { 375 m := redisScollectorTags(sp[1]) 376 tags.Merge(m) 377 break 378 } 379 } 380 var keyspace bool 381 var keys int64 382 for _, line := range infoSplit { 383 line = strings.TrimSpace(line) 384 if line == "" { 385 continue 386 } 387 if line == "# Keyspace" { 388 keyspace = true 389 continue 390 } 391 if keyspace { 392 k, err := redisKeyCount(line) 393 if err != nil { 394 return nil, err 395 } 396 keys += k 397 continue 398 } 399 sp := strings.Split(line, ":") 400 if len(sp) < 2 { 401 continue 402 } 403 m, foundMeta := redisMeta[sp[0]] 404 if !(foundMeta || strings.HasPrefix(sp[0], "cmdstat_")) { 405 continue 406 } 407 if sp[0] == "master_link_status" { 408 Add(&md, "redis."+sp[0], redisMlsMap[sp[1]], tags, m.RateType, m.Unit, m.Desc) 409 continue 410 } 411 if sp[0] == "role" { 412 Add(&md, "redis.is_slave", slave(sp[1]), tags, metadata.Gauge, metadata.Bool, descRedisIsSlave) 413 continue 414 } 415 if sp[0] == "aof_last_bgrewrite_status" || sp[0] == "rdb_last_bgsave_status" { 416 Add(&md, "redis."+sp[0], status(sp[1]), tags, m.RateType, m.Unit, m.Desc) 417 continue 418 } 419 if strings.HasPrefix(sp[0], "cmdstat_") { 420 cmdStats := strings.Split(sp[1], ",") 421 if len(cmdStats) < 3 { 422 continue 423 } 424 cmdStatsCalls := strings.Split(cmdStats[0], "=") 425 if len(cmdStatsCalls) < 2 { 426 continue 427 } 428 cmdStatsUsec := strings.Split(cmdStats[1], "=") 429 if len(cmdStatsUsec) < 2 { 430 continue 431 } 432 var cmdStatsMsec, cmdStatsMsecPc float64 433 microsec, err := strconv.ParseFloat(cmdStatsUsec[1], 64) 434 if err != nil { 435 continue 436 } 437 cmdStatsMsec = microsec / 1000 438 cmdStatsUsecPc := strings.Split(cmdStats[2], "=") 439 if len(cmdStatsUsecPc) < 2 { 440 continue 441 } 442 microsec, err = strconv.ParseFloat(cmdStatsUsecPc[1], 64) 443 if err != nil { 444 continue 445 } 446 cmdStatsMsecPc = microsec / 1000 447 if shortTag := strings.Split(sp[0], "_"); len(shortTag) == 2 { 448 tags["cmd"] = shortTag[1] 449 } 450 Add(&md, "redis.cmdstats_msec_pc", cmdStatsMsecPc, tags, metadata.Gauge, metadata.MilliSecond, descRedisCmdMsecPc) 451 Add(&md, "redis.cmdstats_msec", cmdStatsMsec, tags, metadata.Counter, metadata.MilliSecond, descRedisCmdMsec) 452 Add(&md, "redis.cmdstats_calls", cmdStatsCalls[1], tags, metadata.Counter, metadata.Operation, descRedisCmdCalls) 453 continue 454 } 455 Add(&md, "redis."+sp[0], sp[1], tags, m.RateType, m.Unit, m.Desc) 456 } 457 Add(&md, "redis.key_count", keys, tags, metadata.Gauge, metadata.Key, descRedisKeyCount) 458 } 459 return md, Error 460 } 461 462 const ( 463 descRedisKeyCount = "The total number of keys in the instance." 464 descRedisCmdMsecPc = "The average CPU consumed per command execution." 465 descRedisCmdMsec = "The total CPU time consumed by commands." 466 descRedisCmdCalls = "The total number of calls." 467 descRedisIsSlave = "This indicates if the redis instance is a slave or not." 468 )