github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/log_parser.py (about)

     1  #!/usr/bin/env python
     2  # Copyright 2016 The Kubernetes Authors.
     3  #
     4  # Licensed under the Apache License, Version 2.0 (the "License");
     5  # you may not use this file except in compliance with the License.
     6  # You may obtain a copy of the License at
     7  #
     8  #     http://www.apache.org/licenses/LICENSE-2.0
     9  #
    10  # Unless required by applicable law or agreed to in writing, software
    11  # distributed under the License is distributed on an "AS IS" BASIS,
    12  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  # See the License for the specific language governing permissions and
    14  # limitations under the License.
    15  
    16  import logging
    17  
    18  import jinja2
    19  
    20  import kubelet_parser
    21  import regex
    22  
    23  CONTEXT_DEFAULT = 6
    24  MAX_BUFFER = 5000000  # GAE has RAM limits.
    25  
    26  
    27  def highlight(line, highlight_words):
    28      # Join all the words that need to be bolded into one regex
    29      words_re = regex.combine_wordsRE(highlight_words)
    30      line = words_re.sub(r'<span class="keyword">\1</span>', line)
    31      return '<span class="highlight">%s</span>' % line
    32  
    33  
    34  def log_html(lines, matched_lines, highlight_words, skip_fmt):
    35      """
    36      Constructs the html for the filtered log
    37      Given:
    38          lines: list of all lines in the log
    39          matched_lines: list of lines that have a filtered string in them
    40          highlight_words: list of words to be bolded
    41          skip_fmt: function producing string to replace the skipped lines
    42      Returns:
    43          output: list of a lines HTML code suitable for inclusion in a <pre>
    44          tag, with "interesting" errors highlighted
    45      """
    46      output = []
    47  
    48      matched_lines.append(len(lines))  # sentinel value
    49  
    50      # Escape hatch: if we're going to generate a LOT of output, try to trim it down.
    51      context_lines = CONTEXT_DEFAULT
    52      if len(matched_lines) > 2000:
    53          context_lines = 0
    54  
    55      last_match = None
    56      for match in matched_lines:
    57          if last_match is not None:
    58              previous_end = min(match, last_match + context_lines + 1)
    59              output.extend(lines[last_match + 1: previous_end])
    60          else:
    61              previous_end = 0
    62          if match == len(lines):
    63              context_lines = 0
    64          skip_amount = match - previous_end - context_lines
    65          if skip_amount > 1:
    66              output.append('<span class="skip" data-range="%d-%d">%s</span>' %
    67                            (previous_end, match - context_lines, skip_fmt(skip_amount)))
    68          elif skip_amount == 1:  # pointless say we skipped 1 line
    69              output.append(lines[previous_end])
    70          if match == len(lines):
    71              break
    72          output.extend(lines[max(previous_end, match - context_lines): match])
    73          output.append(highlight(lines[match], highlight_words))
    74          last_match = match
    75  
    76      return output
    77  
    78  
    79  def truncate(data, limit=MAX_BUFFER):
    80      if len(data) <= limit:
    81          return data
    82  
    83      # If we try to process more than MAX_BUFFER, things will probably blow up.
    84      half = limit / 2
    85      # Erase the intermediate lines, but keep the line count consistent so
    86      # skip line expansion works.
    87      cut_newlines = data[half:-half].count('\n')
    88  
    89      logging.warning('truncating buffer %.1f times too large (%d lines erased)',
    90                      len(data) / float(limit), cut_newlines)
    91  
    92      return ''.join([data[:half], '\n' * cut_newlines, data[-half:]])
    93  
    94  def digest(data, objref_dict=None, filters=None, error_re=regex.error_re,
    95      skip_fmt=lambda l: '... skipping %d lines ...' % l):
    96      # pylint: disable=too-many-arguments
    97      """
    98      Given a build log, return a chunk of HTML code suitable for
    99      inclusion in a <pre> tag, with "interesting" errors highlighted.
   100  
   101      This is similar to the output of `grep -C4` with an appropriate regex.
   102      """
   103      if isinstance(data, str):  # the test mocks return str instead of unicode
   104          data = data.decode('utf8', 'replace')
   105      lines = unicode(jinja2.escape(truncate(data))).split('\n')
   106  
   107      if filters is None:
   108          filters = {'Namespace': '', 'UID': '', 'pod': '', 'ContainerID':''}
   109  
   110      highlight_words = regex.default_words
   111  
   112      if filters["pod"]:
   113          highlight_words = [filters["pod"]]
   114  
   115      if not (filters["UID"] or filters["Namespace"] or filters["ContainerID"]):
   116          matched_lines = [n for n, line in enumerate(lines) if error_re.search(line)]
   117      else:
   118          matched_lines, highlight_words = kubelet_parser.parse(lines,
   119              highlight_words, filters, objref_dict)
   120  
   121      output = log_html(lines, matched_lines, highlight_words, skip_fmt)
   122      output.append('')
   123  
   124      return '\n'.join(output)
   125  
   126  
   127  if __name__ == '__main__':
   128      import sys
   129      for f in sys.argv[1:]:
   130          print digest(open(f).read().decode('utf8'))