github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/kettle/monitor.py (about) 1 #!/usr/bin/env python 2 # Copyright 2018 The Kubernetes Authors. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 """ 17 A dead-simple Influxdb data pusher to report BigQuery database statistics. 18 """ 19 20 21 import argparse 22 import json 23 import os 24 import sys 25 import time 26 27 import influxdb 28 29 try: 30 from google.cloud import bigquery 31 import google.cloud.exceptions 32 except ImportError: 33 print 'WARNING: unable to load google cloud (test environment?)' 34 import traceback 35 traceback.print_exc() 36 37 38 def collect(tables, stale_hours, influx_client): 39 lines = [] 40 stale = False 41 for table_spec in tables: 42 project, dataset_name = table_spec.split(':') 43 dataset, name = dataset_name.split('.') 44 45 table = bigquery.Client(project).dataset(dataset).table(name) 46 try: 47 table.reload() 48 except google.cloud.exceptions.NotFound: # pylint: disable=no-member 49 continue 50 51 # converting datetimes back into epoch-milliseconds is tiresome 52 # pylint: disable=protected-access 53 fields = { 54 'size_bytes': table.num_bytes, 55 'modified_time': int(table._properties.get('lastModifiedTime')), 56 'row_count': table.num_rows 57 } 58 sbuf = table._properties.get('streamingBuffer') 59 if sbuf: 60 fields.update({ 61 'streaming_buffer_estimated_bytes': sbuf['estimatedBytes'], 62 'streaming_buffer_estimated_row_count': sbuf['estimatedRows'], 63 'streaming_buffer_oldest_entry_time': int(sbuf['oldestEntryTime']), 64 }) 65 66 hours_old = (time.time() - fields['modified_time'] / 1000) / (3600.0) 67 if stale_hours and hours_old > stale_hours: 68 print 'ERROR: table %s is %.1f hours old. Max allowed: %s hours.' % ( 69 table.table_id, hours_old, stale_hours) 70 stale = True 71 72 lines.append(influxdb.line_protocol.make_lines({ 73 'tags': {'db': table.table_id}, 74 'points': [{'measurement': 'bigquery', 'fields': fields}] 75 })) 76 77 print 'Collected data:' 78 print ''.join(lines) 79 80 if influx_client: 81 influx_client.write_points(lines, time_precision='ms', protocol='line') 82 else: 83 print 'Not uploading to influxdb; missing client.' 84 85 return int(stale) 86 87 88 def make_influx_client(): 89 """Make an InfluxDB client from config at path $VELODROME_INFLUXDB_CONFIG""" 90 if 'VELODROME_INFLUXDB_CONFIG' not in os.environ: 91 return None 92 93 with open(os.environ['VELODROME_INFLUXDB_CONFIG']) as config_file: 94 config = json.load(config_file) 95 96 return influxdb.InfluxDBClient( 97 host=config['host'], 98 port=config['port'], 99 username=config['user'], 100 password=config['password'], 101 database='metrics', 102 ) 103 104 105 def main(args): 106 parser = argparse.ArgumentParser() 107 parser.add_argument('--table', nargs='+', required=True, 108 help='List of datasets to return information about.') 109 parser.add_argument('--stale', type=int, 110 help='Number of hours to consider stale.') 111 opts = parser.parse_args(args) 112 return collect(opts.table, opts.stale, make_influx_client()) 113 114 115 if __name__ == '__main__': 116 sys.exit(main(sys.argv[1:]))