github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/github/handlers.py (about)

     1  # Copyright 2016 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  import cgi
    16  import datetime
    17  import hashlib
    18  import hmac
    19  import logging
    20  import json
    21  import traceback
    22  
    23  import webapp2
    24  from webapp2_extras import security
    25  
    26  from google.appengine.api.runtime import memory_usage
    27  from google.appengine.datastore import datastore_query
    28  from google.appengine.ext import deferred
    29  
    30  import classifier
    31  import models
    32  import secrets
    33  
    34  
    35  _webhook_secret = None
    36  def get_webhook_secret():
    37      global _webhook_secret  # pylint: disable=global-statement
    38      if not _webhook_secret:
    39          try:
    40              _webhook_secret = str(secrets.get('github_webhook_secret', per_host=False))
    41          except KeyError:
    42              logging.exception('unable to load webhook secret')
    43      return _webhook_secret
    44  
    45  
    46  def make_signature(body):
    47      hmac_instance = hmac.HMAC(get_webhook_secret(), body, hashlib.sha1)
    48      return 'sha1=' + hmac_instance.hexdigest()
    49  
    50  
    51  class GithubHandler(webapp2.RequestHandler):
    52      """
    53      Handle POSTs delivered using GitHub's webhook interface. Posts are
    54      authenticated with HMAC signatures and a shared secret.
    55  
    56      Each event is saved to a database, and can trigger additional
    57      processing.
    58      """
    59      def post(self):
    60          event = self.request.headers.get('x-github-event')
    61          signature = self.request.headers.get('x-hub-signature', '')
    62          body = self.request.body
    63  
    64          expected_signature = make_signature(body)
    65          if not security.compare_hashes(signature, expected_signature):
    66              logging.error('webhook failed signature check')
    67              self.abort(400)
    68  
    69          body_json = json.loads(body)
    70          repo = body_json.get('repository', {}).get('full_name')
    71          number = None
    72          if 'pull_request' in body_json:
    73              number = body_json['pull_request']['number']
    74          elif 'issue' in body_json:
    75              number = body_json['issue']['number']
    76  
    77          parent = None
    78          if number:
    79              parent = models.GithubResource.make_key(repo, number)
    80  
    81          kwargs = {}
    82          timestamp = self.request.headers.get('x-timestamp')
    83          if timestamp is not None:
    84              kwargs['timestamp'] = datetime.datetime.strptime(
    85                  timestamp, '%Y-%m-%d %H:%M:%S.%f')
    86  
    87          webhook = models.GithubWebhookRaw(
    88              parent=parent,
    89              repo=repo, number=number, event=event, body=body, **kwargs)
    90          webhook.put()
    91  
    92          # Defer digest updates, so they'll retry on failure.
    93          if event == 'status':
    94              status = models.GHStatus.from_json(body_json)
    95              models.save_if_newer(status)
    96              query = models.GHIssueDigest.find_head(repo, status.sha)
    97              for issue in query.fetch():
    98                  deferred.defer(update_issue_digest, issue.repo, issue.number)
    99  
   100          if number:
   101              deferred.defer(update_issue_digest, repo, number)
   102  
   103  
   104  def update_issue_digest(repo, number, always_put=False):
   105      digest = models.GHIssueDigest.make(repo, number,
   106          *classifier.classify_issue(repo, number))
   107      if always_put:
   108          digest.put()
   109      else:
   110          models.save_if_newer(digest)
   111  
   112  
   113  class BaseHandler(webapp2.RequestHandler):
   114      def dispatch(self):
   115          # Eh, this is less work than making all the debug pages escape properly.
   116          # No resources allowed except for inline CSS, no iframing of content.
   117          self.response.headers['Content-Security-Policy'] = \
   118              "default-src none; style-src 'unsafe-inline'; frame-ancestors none"
   119          super(BaseHandler, self).dispatch()
   120  
   121  
   122  class Events(BaseHandler):
   123      """
   124      Perform input/output on a series of webhook events from the datastore, for
   125      debugging purposes.
   126      """
   127      def get(self):
   128          cursor = datastore_query.Cursor(urlsafe=self.request.get('cursor'))
   129          repo = self.request.get('repo')
   130          number = int(self.request.get('number', 0)) or None
   131          count = int(self.request.get('count', 500))
   132          if repo is not None and number is not None:
   133              q = models.GithubWebhookRaw.query(
   134                  models.GithubWebhookRaw.repo == repo,
   135                  models.GithubWebhookRaw.number == number)
   136          else:
   137              q = models.GithubWebhookRaw.query()
   138          q = q.order(models.GithubWebhookRaw.timestamp)
   139          events, next_cursor, more = q.fetch_page(count, start_cursor=cursor)
   140          out = []
   141          for event in events:
   142              out.append({'repo': event.repo, 'event': event.event,
   143                          'timestamp': str(event.timestamp),
   144                          'body': json.loads(event.body)})
   145          resp = {'next': more and next_cursor.urlsafe(), 'calls': out}
   146          self.response.headers['content-type'] = 'text/json'
   147          self.response.write(json.dumps(resp, indent=4, sort_keys=True))
   148  
   149  
   150  class Status(BaseHandler):
   151      def get(self):
   152          repo = self.request.get('repo')
   153          sha = self.request.get('sha')
   154          if not repo or not sha:
   155              self.abort(403)
   156              return
   157          results = models.GHStatus.query_for_sha(repo, sha)
   158          self.response.write('<table>')
   159          for res in results:
   160              self.response.write('<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' %
   161                  (res.context, res.state, res.target_url, res.description))
   162  
   163  
   164  class Timeline(BaseHandler):
   165      """
   166      Render all the information in the datastore about a particular issue.
   167  
   168      This is used for debugging and investigations.
   169      """
   170      def emit_classified(self, repo, number):
   171          try:
   172              self.response.write('<h3>Classifier Output</h3>')
   173              ret = classifier.classify_issue(repo, number)
   174              self.response.write('<ul><li>pr: %s<li>open: %s<li>involved: %s'
   175                  % tuple(ret[:3]))
   176              self.response.write('<li>last_event_timestamp: %s' % ret[4])
   177              self.response.write('<li>payload len: %d' %len(json.dumps(ret[3])))
   178              self.response.write('<pre>%s</pre></ul>' % cgi.escape(
   179                  json.dumps(ret[3], indent=2, sort_keys=True)))
   180          except BaseException:
   181              self.response.write('<pre>%s</pre>' % traceback.format_exc())
   182  
   183      def emit_events(self, repo, number):
   184          ancestor = models.GithubResource.make_key(repo, number)
   185          events = list(models.GithubWebhookRaw.query(ancestor=ancestor)
   186              .order(models.GithubWebhookRaw.timestamp))
   187  
   188          self.response.write('<h3>Distilled Events</h3>')
   189          self.response.write('<pre>')
   190          event_pairs = [event.to_tuple() for event in events]
   191          for ev in classifier.distill_events(event_pairs):
   192              self.response.write(cgi.escape('%s, %s %s\n' % ev))
   193          self.response.write('</pre>')
   194  
   195          self.response.write('<h3>%d Raw Events</h3>' % (len(events)))
   196          self.response.write('<table border=2>')
   197          self.response.write('<tr><th>Timestamp<th>Event<th>Action<th>Sender<th>Body</tr>')
   198          merged = {}
   199          for event in events:
   200              body_json = json.loads(event.body)
   201              models.shrink(body_json)
   202              if 'issue' in body_json:
   203                  merged.update(body_json['issue'])
   204              elif 'pull_request' in body_json:
   205                  merged.update(body_json['pull_request'])
   206              body = json.dumps(body_json, indent=2)
   207              action = body_json.get('action')
   208              sender = body_json.get('sender', {}).get('login')
   209              self.response.write('<tr><td>%s\n' % '<td>'.join(str(x) for x in
   210                  [event.timestamp, event.event, action, sender,
   211                   '<pre>' + cgi.escape(body)]))
   212          return merged
   213  
   214      def get(self):
   215          repo = self.request.get('repo')
   216          number = self.request.get('number')
   217          if self.request.get('format') == 'json':
   218              ancestor = models.GithubResource.make_key(repo, number)
   219              events = list(models.GithubWebhookRaw.query(ancestor=ancestor))
   220              self.response.headers['content-type'] = 'application/json'
   221              self.response.write(json.dumps([e.body for e in events], indent=True))
   222              return
   223          self.response.write(
   224              '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>')
   225          self.response.write('<p>Memory: %s' % memory_usage().current())
   226          self.emit_classified(repo, number)
   227          self.response.write('<p>Memory: %s' % memory_usage().current())
   228          if self.request.get('classify_only'):
   229              return
   230          merged = self.emit_events(repo, number)
   231          self.response.write('<p>Memory: %s' % memory_usage().current())
   232          if 'head' in merged:
   233              sha = merged['head']['sha']
   234              results = models.GHStatus.query_for_sha(repo, sha)
   235              self.response.write('</table><table>')
   236              for res in results:
   237                  self.response.write('<tr><td>%s<td>%s<td><a href="%s">%s</a>\n'
   238                     % (res.context, res.state, res.target_url, res.description))
   239          models.shrink(merged)
   240          self.response.write('</table><pre>%s</pre>' % cgi.escape(
   241              json.dumps(merged, indent=2, sort_keys=True)))
   242          self.response.write('<p>Memory: %s' % memory_usage().current())