bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/redis_linux.go (about)

     1  package collectors
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/garyburd/redigo/redis"
    11  
    12  	"bosun.org/metadata"
    13  	"bosun.org/opentsdb"
    14  	"bosun.org/util"
    15  )
    16  
    17  func init() {
    18  	collectors = append(collectors, &IntervalCollector{F: c_redis, init: redisInit})
    19  }
    20  
    21  var redisMeta = map[string]MetricMeta{ // http://redis.io/commands/info)
    22  	// Persistence Section
    23  	//   AOF
    24  	"aof_enabled": {
    25  		RateType: metadata.Gauge,
    26  		Unit:     metadata.Enabled,
    27  		Desc:     "AOF Enabled indicates that Append Only File logging is activated.",
    28  	},
    29  	"aof_current_size": {
    30  		RateType: metadata.Gauge,
    31  		Unit:     metadata.Bytes,
    32  		Desc:     "The current file size of the AOF (Append Only File).",
    33  	},
    34  	"aof_rewrite_in_progress": {
    35  		RateType: metadata.Gauge,
    36  		Unit:     metadata.InProgress,
    37  		Desc:     "Rewrite in progress indicates that AOF (Append Only File) logging is activated.",
    38  	},
    39  	"aof_rewrite_scheduled": {
    40  		RateType: metadata.Gauge,
    41  		Unit:     metadata.Scheduled,
    42  		Desc:     "AOF rewrite scheduled means an Append Only file rewrite operation will be scheduled once the on-going RDB save is complete.",
    43  	},
    44  	"aof_last_rewrite_time_sec": {
    45  		RateType: metadata.Gauge,
    46  		Unit:     metadata.Second,
    47  		Desc:     "The duration of the last AOF (Append Only file) rewrite operation in seconds.",
    48  	},
    49  	"aof_current_rewrite_time_sec": {
    50  		RateType: metadata.Gauge,
    51  		Unit:     metadata.Second,
    52  		Desc:     "The duration of the ongoing AOF (Append Only file) rewrite operation in seconds -- if there is one.",
    53  	},
    54  	"aof_last_bgrewrite_status": {
    55  		RateType: metadata.Gauge,
    56  		Unit:     metadata.Bool,
    57  		Desc:     "The status of the last AOF (Append Only File) rewrite opperation.",
    58  	},
    59  	//   RDB
    60  	"rdb_bgsave_in_progress": {
    61  		RateType: metadata.Gauge,
    62  		Unit:     metadata.InProgress,
    63  		Desc:     "BGSAVE in progress indicates if a RDB save is on-going.",
    64  	},
    65  	"rdb_changes_since_last_save": {
    66  		RateType: metadata.Gauge,
    67  		Unit:     metadata.Change,
    68  		Desc:     "The number of operations that produced some kind of changes in the dataset since the last time either SAVE or BGSAVE was called.",
    69  	},
    70  	"rdb_last_bgsave_status": {
    71  		RateType: metadata.Gauge,
    72  		Unit:     metadata.Bool,
    73  		Desc:     "The Status of the last RDB save operation.",
    74  	},
    75  	"rdb_last_bgsave_time_sec": {
    76  		RateType: metadata.Gauge,
    77  		Unit:     metadata.Second,
    78  		Desc:     "The duration of the last RDB save operation.",
    79  	},
    80  	"rdb_current_bgsave_time_sec": {
    81  		RateType: metadata.Gauge,
    82  		Unit:     metadata.Second,
    83  		Desc:     "The duration of the ongoing RDB save operation -- if there is one.",
    84  	},
    85  	"rdb_last_save_time": {
    86  		RateType: metadata.Gauge,
    87  		Unit:     metadata.Timestamp,
    88  		Desc:     "The epoch-based timestamp of last successful RDB save.",
    89  	},
    90  
    91  	// Clients Section
    92  	"blocked_clients": {
    93  		RateType: metadata.Gauge,
    94  		Unit:     metadata.Client,
    95  		Desc:     "The number of clients pending on a blocking call (BLPOP, BRPOP, BRPOPLPUSH).",
    96  	},
    97  	"connected_clients": {
    98  		RateType: metadata.Gauge,
    99  		Unit:     metadata.Connection,
   100  		Desc:     "The number of client connections (excluding connections from slaves).",
   101  	},
   102  	"client_biggest_input_buf": {
   103  		RateType: metadata.Gauge,
   104  		Unit:     metadata.Count, // Need to figure out what this is, bytes?
   105  		Desc:     "The biggest input buffer among current client connections.",
   106  	},
   107  	"client_longest_output_list": {
   108  		RateType: metadata.Gauge,
   109  		Unit:     metadata.Count, // Need to figure out what this is, length?
   110  		Desc:     "The longest output list among current client connections.",
   111  	},
   112  
   113  	// Replication Sections
   114  	"connected_slaves": {
   115  		RateType: metadata.Gauge,
   116  		Unit:     metadata.Slave,
   117  		Desc:     "The number of connected slaves.",
   118  	},
   119  	"master_link_status": {
   120  		RateType: metadata.Gauge,
   121  		Unit:     metadata.Ok,
   122  		Desc:     "The up/down status of the link to the master.",
   123  	},
   124  	"master_last_io_seconds_ago": {
   125  		RateType: metadata.Gauge,
   126  		Unit:     metadata.Second,
   127  		Desc:     "The number of seconds since the last interaction with master.",
   128  	},
   129  	"master_sync_in_progress": {
   130  		RateType: metadata.Gauge,
   131  		Unit:     metadata.InProgress,
   132  		Desc:     "Master sync in progress indicates that the master is syncing to the slave.",
   133  	},
   134  	"master_sync_left_bytes": {
   135  		RateType: metadata.Gauge,
   136  		Unit:     metadata.Bytes,
   137  		Desc:     "The number of bytes left before syncing is complete.",
   138  	},
   139  	"master_sync_last_io_seconds_ago": {
   140  		RateType: metadata.Gauge,
   141  		Unit:     metadata.Second,
   142  		Desc:     "The number of seconds since last transfer I/O during a SYNC operation.",
   143  	},
   144  
   145  	// Stats Section
   146  	"evicted_keys": {
   147  		RateType: metadata.Counter,
   148  		Unit:     metadata.Key,
   149  		Desc:     "The number of evicted keys due to maxmemory limit.",
   150  	},
   151  	"expired_keys": {
   152  		RateType: metadata.Counter,
   153  		Unit:     metadata.Key,
   154  		Desc:     "The total total number of key expiration events.",
   155  	},
   156  	"keyspace_hits": {
   157  		RateType: metadata.Counter,
   158  		Unit:     metadata.CacheHit,
   159  		Desc:     "The number of successful lookup of keys in the main dictionary.",
   160  	},
   161  	"keyspace_misses": {
   162  		RateType: metadata.Counter,
   163  		Unit:     metadata.CacheMiss,
   164  		Desc:     "The number of failed lookup of keys in the main dictionary.",
   165  	},
   166  	"used_cpu_sys": {
   167  		RateType: metadata.Counter,
   168  		Unit:     metadata.Pct,
   169  		Desc:     "The system CPU used by the main Redis process.",
   170  	},
   171  	"used_cpu_user": {
   172  		RateType: metadata.Counter,
   173  		Unit:     metadata.Pct,
   174  		Desc:     "The user space CPU used by the main Redis process.",
   175  	},
   176  	"uptime_in_seconds": {
   177  		RateType: metadata.Gauge,
   178  		Unit:     metadata.Second,
   179  		Desc:     "The number of seconds since Redis server start.",
   180  	},
   181  	"total_connections_received": {
   182  		RateType: metadata.Counter,
   183  		Unit:     metadata.Connection,
   184  		Desc:     "The total number of connections accepted by the server.",
   185  	},
   186  	"total_commands_processed": {
   187  		RateType: metadata.Counter,
   188  		Unit:     metadata.Command,
   189  		Desc:     "The total number of commands processed by the server.",
   190  	},
   191  	"pubsub_channels": {
   192  		RateType: metadata.Gauge,
   193  		Unit:     metadata.Channel,
   194  		Desc:     "Global number of pub/sub channels with client subscriptions.",
   195  	},
   196  	"pubsub_patterns": {
   197  		RateType: metadata.Gauge,
   198  		Unit:     "Pattern",
   199  		Desc:     "Global number of pub/sub channels with client subscriptions.",
   200  	},
   201  	"rejected_connections": {
   202  		RateType: metadata.Counter,
   203  		Unit:     metadata.Connection,
   204  		Desc:     "The number of connections rejected because of maxclients limit.",
   205  	},
   206  	"sync_full": {
   207  		RateType: metadata.Gauge, // Although the sync metrics are counters, it is not something by default you would want as a rate per second
   208  		Unit:     metadata.Resync,
   209  		Desc:     "The number of full resynchronizations with slaves.",
   210  	},
   211  	"sync_partial_ok": {
   212  		RateType: metadata.Gauge,
   213  		Unit:     metadata.Resync,
   214  		Desc:     "The number of accepted PSYNC (partial resynchronization) requests.",
   215  	},
   216  	"sync_partial_err": {
   217  		RateType: metadata.Gauge,
   218  		Unit:     metadata.Resync,
   219  		Desc:     "The number of unaccepted PSYNC (partial resynchronization) requests.",
   220  	},
   221  
   222  	// Memory Section
   223  	"used_memory": {
   224  		RateType: metadata.Gauge,
   225  		Unit:     metadata.Bytes,
   226  		Desc:     "The total number of bytes allocated by Redis using its allocator (either standard libc, jemalloc, or an alternative allocator such as tcmalloc.",
   227  	},
   228  	"used_memory_rss": {
   229  		RateType: metadata.Gauge,
   230  		Unit:     metadata.Bytes,
   231  		Desc:     "The number of bytes that Redis allocated as seen by the operating system (a.k.a resident set size). This is the number reported by tools such as top(1) and ps(1).",
   232  	},
   233  	"mem_fragmentation_ratio": {
   234  		RateType: metadata.Gauge,
   235  		Unit:     metadata.Ratio,
   236  		Desc:     "The ratio between used_memory_rss and used_memory.",
   237  	},
   238  
   239  	//Other
   240  	"role": {}, // This gets treated independtly to create the is_slave metric
   241  }
   242  
   243  // For master_link_status.
   244  var redisMlsMap = map[string]string{
   245  	"up":   "1",
   246  	"down": "0",
   247  }
   248  
   249  // For aof_last_bgrewrite_status, rdb_last_bgsave_status.
   250  func status(s string) string {
   251  	if s == "ok" {
   252  		return "1"
   253  	}
   254  	return "0"
   255  }
   256  
   257  // For role which translates to is_slave
   258  func slave(s string) string {
   259  	if s == "slave" {
   260  		return "1"
   261  	}
   262  	return "0"
   263  }
   264  
   265  var (
   266  	tcRE           = regexp.MustCompile(`^\s*#\s*scollector.(\w+)\s*=\s*(.+)$`)
   267  	redisInstances []opentsdb.TagSet
   268  )
   269  
   270  func redisScollectorTags(cfg string) map[string]string {
   271  	m := make(opentsdb.TagSet)
   272  	readLine(cfg, func(cfgline string) error {
   273  		result := tcRE.FindStringSubmatch(cfgline)
   274  		if len(result) == 3 {
   275  			m[result[1]] = result[2]
   276  		}
   277  		return nil
   278  	})
   279  	return m
   280  }
   281  
   282  func redisInit() {
   283  	update := func() {
   284  		var instances []opentsdb.TagSet
   285  		oldRedis := false
   286  		add := func(port string) {
   287  			ri := make(opentsdb.TagSet)
   288  			ri["port"] = port
   289  			instances = append(instances, ri)
   290  		}
   291  		util.ReadCommand(func(line string) error {
   292  			sp := strings.Fields(line)
   293  			if len(sp) != 3 || !strings.HasSuffix(sp[1], "redis-server") {
   294  				return nil
   295  			}
   296  			if !strings.Contains(sp[2], ":") {
   297  				oldRedis = true
   298  				return nil
   299  			}
   300  			port := strings.Split(sp[2], ":")[1]
   301  			if port != "0" && InContainer(sp[0]) == false {
   302  				add(port)
   303  			}
   304  			return nil
   305  		}, "ps", "-e", "-o", "pid,args")
   306  		if oldRedis {
   307  			util.ReadCommand(func(line string) error {
   308  				if !strings.Contains(line, "redis-server") {
   309  					return nil
   310  				}
   311  				sp := strings.Fields(line)
   312  				if len(sp) < 7 || !strings.Contains(sp[3], ":") {
   313  					return nil
   314  				}
   315  				port := strings.Split(sp[3], ":")[1]
   316  				add(port)
   317  				return nil
   318  			}, "netstat", "-tnlp")
   319  		}
   320  		redisInstances = instances
   321  	}
   322  	update()
   323  	go func() {
   324  		for range time.Tick(time.Minute * 5) {
   325  			update()
   326  		}
   327  	}()
   328  }
   329  
   330  func redisKeyCount(line string) (int64, error) {
   331  	err := fmt.Errorf("Error parsing keyspace line from redis info: %v", line)
   332  	colSplit := strings.Split(line, ":")
   333  	if len(colSplit) < 2 {
   334  		return 0, err
   335  	}
   336  	comSplit := strings.Split(colSplit[1], ",")
   337  	if len(comSplit) != 3 {
   338  		return 0, err
   339  	}
   340  	eqSplit := strings.Split(comSplit[0], "=")
   341  	if len(eqSplit) != 2 || eqSplit[0] != "keys" {
   342  		return 0, err
   343  	}
   344  	v, err := strconv.ParseInt(eqSplit[1], 10, 64)
   345  	if err != nil {
   346  		return 0, err
   347  	}
   348  	return v, nil
   349  }
   350  
   351  func c_redis() (opentsdb.MultiDataPoint, error) {
   352  	var md opentsdb.MultiDataPoint
   353  	var Error error
   354  	for _, instance := range redisInstances {
   355  		c, err := redis.Dial("tcp", fmt.Sprintf(":%s", instance["port"]))
   356  		if err != nil {
   357  			Error = err
   358  			continue
   359  		}
   360  		defer c.Close()
   361  		info, err := c.Do("info", "all")
   362  		if err != nil {
   363  			Error = err
   364  			continue
   365  		}
   366  		tags := instance.Copy()
   367  		infoSplit := strings.Split(string(info.([]uint8)), "\n")
   368  		for _, line := range infoSplit {
   369  			line = strings.TrimSpace(line)
   370  			sp := strings.Split(line, ":")
   371  			if len(sp) < 2 || sp[0] != "config_file" {
   372  				continue
   373  			}
   374  			if sp[1] != "" {
   375  				m := redisScollectorTags(sp[1])
   376  				tags.Merge(m)
   377  				break
   378  			}
   379  		}
   380  		var keyspace bool
   381  		var keys int64
   382  		for _, line := range infoSplit {
   383  			line = strings.TrimSpace(line)
   384  			if line == "" {
   385  				continue
   386  			}
   387  			if line == "# Keyspace" {
   388  				keyspace = true
   389  				continue
   390  			}
   391  			if keyspace {
   392  				k, err := redisKeyCount(line)
   393  				if err != nil {
   394  					return nil, err
   395  				}
   396  				keys += k
   397  				continue
   398  			}
   399  			sp := strings.Split(line, ":")
   400  			if len(sp) < 2 {
   401  				continue
   402  			}
   403  			m, foundMeta := redisMeta[sp[0]]
   404  			if !(foundMeta || strings.HasPrefix(sp[0], "cmdstat_")) {
   405  				continue
   406  			}
   407  			if sp[0] == "master_link_status" {
   408  				Add(&md, "redis."+sp[0], redisMlsMap[sp[1]], tags, m.RateType, m.Unit, m.Desc)
   409  				continue
   410  			}
   411  			if sp[0] == "role" {
   412  				Add(&md, "redis.is_slave", slave(sp[1]), tags, metadata.Gauge, metadata.Bool, descRedisIsSlave)
   413  				continue
   414  			}
   415  			if sp[0] == "aof_last_bgrewrite_status" || sp[0] == "rdb_last_bgsave_status" {
   416  				Add(&md, "redis."+sp[0], status(sp[1]), tags, m.RateType, m.Unit, m.Desc)
   417  				continue
   418  			}
   419  			if strings.HasPrefix(sp[0], "cmdstat_") {
   420  				cmdStats := strings.Split(sp[1], ",")
   421  				if len(cmdStats) < 3 {
   422  					continue
   423  				}
   424  				cmdStatsCalls := strings.Split(cmdStats[0], "=")
   425  				if len(cmdStatsCalls) < 2 {
   426  					continue
   427  				}
   428  				cmdStatsUsec := strings.Split(cmdStats[1], "=")
   429  				if len(cmdStatsUsec) < 2 {
   430  					continue
   431  				}
   432  				var cmdStatsMsec, cmdStatsMsecPc float64
   433  				microsec, err := strconv.ParseFloat(cmdStatsUsec[1], 64)
   434  				if err != nil {
   435  					continue
   436  				}
   437  				cmdStatsMsec = microsec / 1000
   438  				cmdStatsUsecPc := strings.Split(cmdStats[2], "=")
   439  				if len(cmdStatsUsecPc) < 2 {
   440  					continue
   441  				}
   442  				microsec, err = strconv.ParseFloat(cmdStatsUsecPc[1], 64)
   443  				if err != nil {
   444  					continue
   445  				}
   446  				cmdStatsMsecPc = microsec / 1000
   447  				if shortTag := strings.Split(sp[0], "_"); len(shortTag) == 2 {
   448  					tags["cmd"] = shortTag[1]
   449  				}
   450  				Add(&md, "redis.cmdstats_msec_pc", cmdStatsMsecPc, tags, metadata.Gauge, metadata.MilliSecond, descRedisCmdMsecPc)
   451  				Add(&md, "redis.cmdstats_msec", cmdStatsMsec, tags, metadata.Counter, metadata.MilliSecond, descRedisCmdMsec)
   452  				Add(&md, "redis.cmdstats_calls", cmdStatsCalls[1], tags, metadata.Counter, metadata.Operation, descRedisCmdCalls)
   453  				continue
   454  			}
   455  			Add(&md, "redis."+sp[0], sp[1], tags, m.RateType, m.Unit, m.Desc)
   456  		}
   457  		Add(&md, "redis.key_count", keys, tags, metadata.Gauge, metadata.Key, descRedisKeyCount)
   458  	}
   459  	return md, Error
   460  }
   461  
   462  const (
   463  	descRedisKeyCount  = "The total number of keys in the instance."
   464  	descRedisCmdMsecPc = "The average CPU consumed per command execution."
   465  	descRedisCmdMsec   = "The total CPU time consumed by commands."
   466  	descRedisCmdCalls  = "The total number of calls."
   467  	descRedisIsSlave   = "This indicates if the redis instance is a slave or not."
   468  )