github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/kettle/monitor.py (about)

     1  #!/usr/bin/env python
     2  # Copyright 2018 The Kubernetes Authors.
     3  #
     4  # Licensed under the Apache License, Version 2.0 (the "License");
     5  # you may not use this file except in compliance with the License.
     6  # You may obtain a copy of the License at
     7  #
     8  #     http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  
    16  """
    17  A dead-simple Influxdb data pusher to report BigQuery database statistics.
    18  """
    19  
    20  
    21  import argparse
    22  import json
    23  import os
    24  import sys
    25  import time
    26  
    27  import influxdb
    28  
    29  try:
    30      from google.cloud import bigquery
    31      import google.cloud.exceptions
    32  except ImportError:
    33      print 'WARNING: unable to load google cloud (test environment?)'
    34      import traceback
    35      traceback.print_exc()
    36  
    37  
    38  def collect(tables, stale_hours, influx_client):
    39      lines = []
    40      stale = False
    41      for table_spec in tables:
    42          project, dataset_name = table_spec.split(':')
    43          dataset, name = dataset_name.split('.')
    44  
    45          table = bigquery.Client(project).dataset(dataset).table(name)
    46          try:
    47              table.reload()
    48          except google.cloud.exceptions.NotFound:  # pylint: disable=no-member
    49              continue
    50  
    51          # converting datetimes back into epoch-milliseconds is tiresome
    52          # pylint: disable=protected-access
    53          fields = {
    54              'size_bytes': table.num_bytes,
    55              'modified_time': int(table._properties.get('lastModifiedTime')),
    56              'row_count': table.num_rows
    57          }
    58          sbuf = table._properties.get('streamingBuffer')
    59          if sbuf:
    60              fields.update({
    61                  'streaming_buffer_estimated_bytes': sbuf['estimatedBytes'],
    62                  'streaming_buffer_estimated_row_count': sbuf['estimatedRows'],
    63                  'streaming_buffer_oldest_entry_time': int(sbuf['oldestEntryTime']),
    64              })
    65  
    66          hours_old = (time.time() - fields['modified_time'] / 1000) / (3600.0)
    67          if stale_hours and hours_old > stale_hours:
    68              print 'ERROR: table %s is %.1f hours old. Max allowed: %s hours.' % (
    69                  table.table_id, hours_old, stale_hours)
    70              stale = True
    71  
    72          lines.append(influxdb.line_protocol.make_lines({
    73              'tags': {'db': table.table_id},
    74              'points': [{'measurement': 'bigquery', 'fields': fields}]
    75          }))
    76  
    77      print 'Collected data:'
    78      print ''.join(lines)
    79  
    80      if influx_client:
    81          influx_client.write_points(lines, time_precision='ms', protocol='line')
    82      else:
    83          print 'Not uploading to influxdb; missing client.'
    84  
    85      return int(stale)
    86  
    87  
    88  def make_influx_client():
    89      """Make an InfluxDB client from config at path $VELODROME_INFLUXDB_CONFIG"""
    90      if 'VELODROME_INFLUXDB_CONFIG' not in os.environ:
    91          return None
    92  
    93      with open(os.environ['VELODROME_INFLUXDB_CONFIG']) as config_file:
    94          config = json.load(config_file)
    95  
    96      return influxdb.InfluxDBClient(
    97          host=config['host'],
    98          port=config['port'],
    99          username=config['user'],
   100          password=config['password'],
   101          database='metrics',
   102      )
   103  
   104  
   105  def main(args):
   106      parser = argparse.ArgumentParser()
   107      parser.add_argument('--table', nargs='+', required=True,
   108                          help='List of datasets to return information about.')
   109      parser.add_argument('--stale', type=int,
   110                          help='Number of hours to consider stale.')
   111      opts = parser.parse_args(args)
   112      return collect(opts.table, opts.stale, make_influx_client())
   113  
   114  
   115  if __name__ == '__main__':
   116      sys.exit(main(sys.argv[1:]))