github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/experiment/find_developers.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2017 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # Need to figure out why this only fails on travis
    18  # pylint: disable=bad-continuation
    19  
    20  
    21  """Selects a random sample of kubernetes developers."""
    22  
    23  import json
    24  import os
    25  import random
    26  import sys
    27  
    28  import requests
    29  
    30  def download_content():
    31      """Downloads contributor data from github."""
    32      resp = requests.get('https://api.github.com/repos/kubernetes/kubernetes/stats/contributors')
    33      resp.raise_for_status()
    34      data = resp.content
    35      return data
    36  
    37  
    38  def load_content(data):
    39      """Parse the json response."""
    40      users = [User(b) for b in json.loads(data)]
    41      return users
    42  
    43  
    44  class User(object):  # pylint: disable=too-few-public-methods
    45      """Store .user and number of .total and .recent commits."""
    46      def __init__(self, blob):
    47          self.user = blob['author']['login']
    48          weeks = blob['weeks']
    49          self.recent = sum(k['c'] for k in weeks[-12:])
    50          self.total = sum(k['c'] for k in weeks)
    51  
    52      def __cmp__(self, other):
    53          return cmp((self.recent, self.total, self.user), (other.recent, other.total, other.user))
    54  
    55  
    56  def find_users(users, num, top, middle, bottom):
    57      """Selects num users from top, middle, bottom thirds with specified biases."""
    58      total = len(users)
    59      if num >= total:
    60          return users
    61      third = int(total/3.0)
    62      # pylint: disable=invalid-name
    63      p3 = random.sample(users[:third], int(num * bottom))
    64      p5 = random.sample(users[third:-third], int(num * middle))
    65      p7 = random.sample(users[-third:], int(num * top))
    66      # pylint: enable=invalid-name
    67      have = []
    68      have.extend(p3)
    69      have.extend(p5)
    70      have.extend(p7)
    71      if len(have) < num:
    72          missing = num - len(have)
    73          remaining = [u for u in users if u not in have]
    74          extra = random.sample(remaining, missing)
    75          have.extend(extra)
    76      return have
    77  
    78  
    79  def main(path=None, num=35, top=0.6, middle=0.2, bottom=0.2):
    80      """Select users to survey."""
    81      if not path:
    82          data = download_content()
    83      else:
    84          with open(os.path.expanduser(path)) as fp:
    85              data = fp.read()
    86      users = sorted(load_content(data))
    87      for user in find_users(users, num, top, middle, bottom):
    88          print '%s (%d recent commits, %d total)' % (user.user, user.recent, user.total)
    89  
    90  
    91  if __name__ == '__main__':
    92      main(*sys.argv[1:])