github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/log_parser.py (about) 1 #!/usr/bin/env python 2 # Copyright 2016 The Kubernetes Authors. 3 # 4 # Licensed under the Apache License, Version 2.0 (the "License"); 5 # you may not use this file except in compliance with the License. 6 # You may obtain a copy of the License at 7 # 8 # http://www.apache.org/licenses/LICENSE-2.0 9 # 10 # Unless required by applicable law or agreed to in writing, software 11 # distributed under the License is distributed on an "AS IS" BASIS, 12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 # See the License for the specific language governing permissions and 14 # limitations under the License. 15 16 import logging 17 18 import jinja2 19 20 import kubelet_parser 21 import regex 22 23 CONTEXT_DEFAULT = 6 24 MAX_BUFFER = 5000000 # GAE has RAM limits. 25 26 27 def highlight(line, highlight_words): 28 # Join all the words that need to be bolded into one regex 29 words_re = regex.combine_wordsRE(highlight_words) 30 line = words_re.sub(r'<span class="keyword">\1</span>', line) 31 return '<span class="highlight">%s</span>' % line 32 33 34 def log_html(lines, matched_lines, highlight_words, skip_fmt): 35 """ 36 Constructs the html for the filtered log 37 Given: 38 lines: list of all lines in the log 39 matched_lines: list of lines that have a filtered string in them 40 highlight_words: list of words to be bolded 41 skip_fmt: function producing string to replace the skipped lines 42 Returns: 43 output: list of a lines HTML code suitable for inclusion in a <pre> 44 tag, with "interesting" errors highlighted 45 """ 46 output = [] 47 48 matched_lines.append(len(lines)) # sentinel value 49 50 # Escape hatch: if we're going to generate a LOT of output, try to trim it down. 51 context_lines = CONTEXT_DEFAULT 52 if len(matched_lines) > 2000: 53 context_lines = 0 54 55 last_match = None 56 for match in matched_lines: 57 if last_match is not None: 58 previous_end = min(match, last_match + context_lines + 1) 59 output.extend(lines[last_match + 1: previous_end]) 60 else: 61 previous_end = 0 62 if match == len(lines): 63 context_lines = 0 64 skip_amount = match - previous_end - context_lines 65 if skip_amount > 1: 66 output.append('<span class="skip" data-range="%d-%d">%s</span>' % 67 (previous_end, match - context_lines, skip_fmt(skip_amount))) 68 elif skip_amount == 1: # pointless say we skipped 1 line 69 output.append(lines[previous_end]) 70 if match == len(lines): 71 break 72 output.extend(lines[max(previous_end, match - context_lines): match]) 73 output.append(highlight(lines[match], highlight_words)) 74 last_match = match 75 76 return output 77 78 79 def truncate(data, limit=MAX_BUFFER): 80 if len(data) <= limit: 81 return data 82 83 # If we try to process more than MAX_BUFFER, things will probably blow up. 84 half = limit / 2 85 # Erase the intermediate lines, but keep the line count consistent so 86 # skip line expansion works. 87 cut_newlines = data[half:-half].count('\n') 88 89 logging.warning('truncating buffer %.1f times too large (%d lines erased)', 90 len(data) / float(limit), cut_newlines) 91 92 return ''.join([data[:half], '\n' * cut_newlines, data[-half:]]) 93 94 def digest(data, objref_dict=None, filters=None, error_re=regex.error_re, 95 skip_fmt=lambda l: '... skipping %d lines ...' % l): 96 # pylint: disable=too-many-arguments 97 """ 98 Given a build log, return a chunk of HTML code suitable for 99 inclusion in a <pre> tag, with "interesting" errors highlighted. 100 101 This is similar to the output of `grep -C4` with an appropriate regex. 102 """ 103 if isinstance(data, str): # the test mocks return str instead of unicode 104 data = data.decode('utf8', 'replace') 105 lines = unicode(jinja2.escape(truncate(data))).split('\n') 106 107 if filters is None: 108 filters = {'Namespace': '', 'UID': '', 'pod': '', 'ContainerID':''} 109 110 highlight_words = regex.default_words 111 112 if filters["pod"]: 113 highlight_words = [filters["pod"]] 114 115 if not (filters["UID"] or filters["Namespace"] or filters["ContainerID"]): 116 matched_lines = [n for n, line in enumerate(lines) if error_re.search(line)] 117 else: 118 matched_lines, highlight_words = kubelet_parser.parse(lines, 119 highlight_words, filters, objref_dict) 120 121 output = log_html(lines, matched_lines, highlight_words, skip_fmt) 122 output.append('') 123 124 return '\n'.join(output) 125 126 127 if __name__ == '__main__': 128 import sys 129 for f in sys.argv[1:]: 130 print digest(open(f).read().decode('utf8'))