github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/acceptancetests/repository/trusty/haproxy/hooks/charmhelpers/contrib/charmsupport/nrpe.py (about)

     1  # Copyright 2014-2015 Canonical Limited.
     2  #
     3  # This file is part of charm-helpers.
     4  #
     5  # charm-helpers is free software: you can redistribute it and/or modify
     6  # it under the terms of the GNU Lesser General Public License version 3 as
     7  # published by the Free Software Foundation.
     8  #
     9  # charm-helpers is distributed in the hope that it will be useful,
    10  # but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    12  # GNU Lesser General Public License for more details.
    13  #
    14  # You should have received a copy of the GNU Lesser General Public License
    15  # along with charm-helpers.  If not, see <http://www.gnu.org/licenses/>.
    16  
    17  """Compatibility with the nrpe-external-master charm"""
    18  # Copyright 2012 Canonical Ltd.
    19  #
    20  # Authors:
    21  #  Matthew Wedgwood <matthew.wedgwood@canonical.com>
    22  
    23  import subprocess
    24  import pwd
    25  import grp
    26  import os
    27  import glob
    28  import shutil
    29  import re
    30  import shlex
    31  import yaml
    32  
    33  from charmhelpers.core.hookenv import (
    34      config,
    35      local_unit,
    36      log,
    37      relation_ids,
    38      relation_set,
    39      relations_of_type,
    40  )
    41  
    42  from charmhelpers.core.host import service
    43  
    44  # This module adds compatibility with the nrpe-external-master and plain nrpe
    45  # subordinate charms. To use it in your charm:
    46  #
    47  # 1. Update metadata.yaml
    48  #
    49  #   provides:
    50  #     (...)
    51  #     nrpe-external-master:
    52  #       interface: nrpe-external-master
    53  #       scope: container
    54  #
    55  #   and/or
    56  #
    57  #   provides:
    58  #     (...)
    59  #     local-monitors:
    60  #       interface: local-monitors
    61  #       scope: container
    62  
    63  #
    64  # 2. Add the following to config.yaml
    65  #
    66  #    nagios_context:
    67  #      default: "juju"
    68  #      type: string
    69  #      description: |
    70  #        Used by the nrpe subordinate charms.
    71  #        A string that will be prepended to instance name to set the host name
    72  #        in nagios. So for instance the hostname would be something like:
    73  #            juju-myservice-0
    74  #        If you're running multiple environments with the same services in them
    75  #        this allows you to differentiate between them.
    76  #    nagios_servicegroups:
    77  #      default: ""
    78  #      type: string
    79  #      description: |
    80  #        A comma-separated list of nagios servicegroups.
    81  #        If left empty, the nagios_context will be used as the servicegroup
    82  #
    83  # 3. Add custom checks (Nagios plugins) to files/nrpe-external-master
    84  #
    85  # 4. Update your hooks.py with something like this:
    86  #
    87  #    from charmsupport.nrpe import NRPE
    88  #    (...)
    89  #    def update_nrpe_config():
    90  #        nrpe_compat = NRPE()
    91  #        nrpe_compat.add_check(
    92  #            shortname = "myservice",
    93  #            description = "Check MyService",
    94  #            check_cmd = "check_http -w 2 -c 10 http://localhost"
    95  #            )
    96  #        nrpe_compat.add_check(
    97  #            "myservice_other",
    98  #            "Check for widget failures",
    99  #            check_cmd = "/srv/myapp/scripts/widget_check"
   100  #            )
   101  #        nrpe_compat.write()
   102  #
   103  #    def config_changed():
   104  #        (...)
   105  #        update_nrpe_config()
   106  #
   107  #    def nrpe_external_master_relation_changed():
   108  #        update_nrpe_config()
   109  #
   110  #    def local_monitors_relation_changed():
   111  #        update_nrpe_config()
   112  #
   113  # 5. ln -s hooks.py nrpe-external-master-relation-changed
   114  #    ln -s hooks.py local-monitors-relation-changed
   115  
   116  
   117  class CheckException(Exception):
   118      pass
   119  
   120  
   121  class Check(object):
   122      shortname_re = '[A-Za-z0-9-_]+$'
   123      service_template = ("""
   124  #---------------------------------------------------
   125  # This file is Juju managed
   126  #---------------------------------------------------
   127  define service {{
   128      use                             active-service
   129      host_name                       {nagios_hostname}
   130      service_description             {nagios_hostname}[{shortname}] """
   131                          """{description}
   132      check_command                   check_nrpe!{command}
   133      servicegroups                   {nagios_servicegroup}
   134  }}
   135  """)
   136  
   137      def __init__(self, shortname, description, check_cmd):
   138          super(Check, self).__init__()
   139          # XXX: could be better to calculate this from the service name
   140          if not re.match(self.shortname_re, shortname):
   141              raise CheckException("shortname must match {}".format(
   142                  Check.shortname_re))
   143          self.shortname = shortname
   144          self.command = "check_{}".format(shortname)
   145          # Note: a set of invalid characters is defined by the
   146          # Nagios server config
   147          # The default is: illegal_object_name_chars=`~!$%^&*"|'<>?,()=
   148          self.description = description
   149          self.check_cmd = self._locate_cmd(check_cmd)
   150  
   151      def _locate_cmd(self, check_cmd):
   152          search_path = (
   153              '/usr/lib/nagios/plugins',
   154              '/usr/local/lib/nagios/plugins',
   155          )
   156          parts = shlex.split(check_cmd)
   157          for path in search_path:
   158              if os.path.exists(os.path.join(path, parts[0])):
   159                  command = os.path.join(path, parts[0])
   160                  if len(parts) > 1:
   161                      command += " " + " ".join(parts[1:])
   162                  return command
   163          log('Check command not found: {}'.format(parts[0]))
   164          return ''
   165  
   166      def write(self, nagios_context, hostname, nagios_servicegroups):
   167          nrpe_check_file = '/etc/nagios/nrpe.d/{}.cfg'.format(
   168              self.command)
   169          with open(nrpe_check_file, 'w') as nrpe_check_config:
   170              nrpe_check_config.write("# check {}\n".format(self.shortname))
   171              nrpe_check_config.write("command[{}]={}\n".format(
   172                  self.command, self.check_cmd))
   173  
   174          if not os.path.exists(NRPE.nagios_exportdir):
   175              log('Not writing service config as {} is not accessible'.format(
   176                  NRPE.nagios_exportdir))
   177          else:
   178              self.write_service_config(nagios_context, hostname,
   179                                        nagios_servicegroups)
   180  
   181      def write_service_config(self, nagios_context, hostname,
   182                               nagios_servicegroups):
   183          for f in os.listdir(NRPE.nagios_exportdir):
   184              if re.search('.*{}.cfg'.format(self.command), f):
   185                  os.remove(os.path.join(NRPE.nagios_exportdir, f))
   186  
   187          templ_vars = {
   188              'nagios_hostname': hostname,
   189              'nagios_servicegroup': nagios_servicegroups,
   190              'description': self.description,
   191              'shortname': self.shortname,
   192              'command': self.command,
   193          }
   194          nrpe_service_text = Check.service_template.format(**templ_vars)
   195          nrpe_service_file = '{}/service__{}_{}.cfg'.format(
   196              NRPE.nagios_exportdir, hostname, self.command)
   197          with open(nrpe_service_file, 'w') as nrpe_service_config:
   198              nrpe_service_config.write(str(nrpe_service_text))
   199  
   200      def run(self):
   201          subprocess.call(self.check_cmd)
   202  
   203  
   204  class NRPE(object):
   205      nagios_logdir = '/var/log/nagios'
   206      nagios_exportdir = '/var/lib/nagios/export'
   207      nrpe_confdir = '/etc/nagios/nrpe.d'
   208  
   209      def __init__(self, hostname=None):
   210          super(NRPE, self).__init__()
   211          self.config = config()
   212          self.nagios_context = self.config['nagios_context']
   213          if 'nagios_servicegroups' in self.config and self.config['nagios_servicegroups']:
   214              self.nagios_servicegroups = self.config['nagios_servicegroups']
   215          else:
   216              self.nagios_servicegroups = self.nagios_context
   217          self.unit_name = local_unit().replace('/', '-')
   218          if hostname:
   219              self.hostname = hostname
   220          else:
   221              self.hostname = "{}-{}".format(self.nagios_context, self.unit_name)
   222          self.checks = []
   223  
   224      def add_check(self, *args, **kwargs):
   225          self.checks.append(Check(*args, **kwargs))
   226  
   227      def write(self):
   228          try:
   229              nagios_uid = pwd.getpwnam('nagios').pw_uid
   230              nagios_gid = grp.getgrnam('nagios').gr_gid
   231          except:
   232              log("Nagios user not set up, nrpe checks not updated")
   233              return
   234  
   235          if not os.path.exists(NRPE.nagios_logdir):
   236              os.mkdir(NRPE.nagios_logdir)
   237              os.chown(NRPE.nagios_logdir, nagios_uid, nagios_gid)
   238  
   239          nrpe_monitors = {}
   240          monitors = {"monitors": {"remote": {"nrpe": nrpe_monitors}}}
   241          for nrpecheck in self.checks:
   242              nrpecheck.write(self.nagios_context, self.hostname,
   243                              self.nagios_servicegroups)
   244              nrpe_monitors[nrpecheck.shortname] = {
   245                  "command": nrpecheck.command,
   246              }
   247  
   248          service('restart', 'nagios-nrpe-server')
   249  
   250          monitor_ids = relation_ids("local-monitors") + \
   251              relation_ids("nrpe-external-master")
   252          for rid in monitor_ids:
   253              relation_set(relation_id=rid, monitors=yaml.dump(monitors))
   254  
   255  
   256  def get_nagios_hostcontext(relation_name='nrpe-external-master'):
   257      """
   258      Query relation with nrpe subordinate, return the nagios_host_context
   259  
   260      :param str relation_name: Name of relation nrpe sub joined to
   261      """
   262      for rel in relations_of_type(relation_name):
   263          if 'nagios_hostname' in rel:
   264              return rel['nagios_host_context']
   265  
   266  
   267  def get_nagios_hostname(relation_name='nrpe-external-master'):
   268      """
   269      Query relation with nrpe subordinate, return the nagios_hostname
   270  
   271      :param str relation_name: Name of relation nrpe sub joined to
   272      """
   273      for rel in relations_of_type(relation_name):
   274          if 'nagios_hostname' in rel:
   275              return rel['nagios_hostname']
   276  
   277  
   278  def get_nagios_unit_name(relation_name='nrpe-external-master'):
   279      """
   280      Return the nagios unit name prepended with host_context if needed
   281  
   282      :param str relation_name: Name of relation nrpe sub joined to
   283      """
   284      host_context = get_nagios_hostcontext(relation_name)
   285      if host_context:
   286          unit = "%s:%s" % (host_context, local_unit())
   287      else:
   288          unit = local_unit()
   289      return unit
   290  
   291  
   292  def add_init_service_checks(nrpe, services, unit_name):
   293      """
   294      Add checks for each service in list
   295  
   296      :param NRPE nrpe: NRPE object to add check to
   297      :param list services: List of services to check
   298      :param str unit_name: Unit name to use in check description
   299      """
   300      for svc in services:
   301          upstart_init = '/etc/init/%s.conf' % svc
   302          sysv_init = '/etc/init.d/%s' % svc
   303          if os.path.exists(upstart_init):
   304              nrpe.add_check(
   305                  shortname=svc,
   306                  description='process check {%s}' % unit_name,
   307                  check_cmd='check_upstart_job %s' % svc
   308              )
   309          elif os.path.exists(sysv_init):
   310              cronpath = '/etc/cron.d/nagios-service-check-%s' % svc
   311              cron_file = ('*/5 * * * * root '
   312                           '/usr/local/lib/nagios/plugins/check_exit_status.pl '
   313                           '-s /etc/init.d/%s status > '
   314                           '/var/lib/nagios/service-check-%s.txt\n' % (svc,
   315                                                                       svc)
   316                           )
   317              f = open(cronpath, 'w')
   318              f.write(cron_file)
   319              f.close()
   320              nrpe.add_check(
   321                  shortname=svc,
   322                  description='process check {%s}' % unit_name,
   323                  check_cmd='check_status_file.py -f '
   324                            '/var/lib/nagios/service-check-%s.txt' % svc,
   325              )
   326  
   327  
   328  def copy_nrpe_checks():
   329      """
   330      Copy the nrpe checks into place
   331  
   332      """
   333      NAGIOS_PLUGINS = '/usr/local/lib/nagios/plugins'
   334      nrpe_files_dir = os.path.join(os.getenv('CHARM_DIR'), 'hooks',
   335                                    'charmhelpers', 'contrib', 'openstack',
   336                                    'files')
   337  
   338      if not os.path.exists(NAGIOS_PLUGINS):
   339          os.makedirs(NAGIOS_PLUGINS)
   340      for fname in glob.glob(os.path.join(nrpe_files_dir, "check_*")):
   341          if os.path.isfile(fname):
   342              shutil.copy2(fname,
   343                           os.path.join(NAGIOS_PLUGINS, os.path.basename(fname)))
   344  
   345  
   346  def add_haproxy_checks(nrpe, unit_name):
   347      """
   348      Add checks for each service in list
   349  
   350      :param NRPE nrpe: NRPE object to add check to
   351      :param str unit_name: Unit name to use in check description
   352      """
   353      nrpe.add_check(
   354          shortname='haproxy_servers',
   355          description='Check HAProxy {%s}' % unit_name,
   356          check_cmd='check_haproxy.sh')
   357      nrpe.add_check(
   358          shortname='haproxy_queue',
   359          description='Check HAProxy queue depth {%s}' % unit_name,
   360          check_cmd='check_haproxy_queue_depth.sh')