github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/tide/search.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tide
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"k8s.io/test-infra/prow/github"
    25  
    26  	githubql "github.com/shurcooL/githubv4"
    27  	"github.com/sirupsen/logrus"
    28  )
    29  
    30  type searchExecutor func(start, end time.Time) ([]PullRequest, int /*true match count*/, error)
    31  
    32  func newSearchExecutor(ctx context.Context, ghc githubClient, log *logrus.Entry, q string) searchExecutor {
    33  	return func(start, end time.Time) ([]PullRequest, int, error) {
    34  		datedQuery := fmt.Sprintf("%s %s", q, dateToken(start, end))
    35  		vars := map[string]interface{}{
    36  			"query":        githubql.String(datedQuery),
    37  			"searchCursor": (*githubql.String)(nil),
    38  		}
    39  		var totalCost, remaining int
    40  		var totalMatches int
    41  		var ret []PullRequest
    42  		for {
    43  			sq := searchQuery{}
    44  			if err := ghc.Query(ctx, &sq, vars); err != nil {
    45  				return nil, 0, fmt.Errorf("error handling query: %q, err: %v", datedQuery, err)
    46  			}
    47  			totalCost += int(sq.RateLimit.Cost)
    48  			remaining = int(sq.RateLimit.Remaining)
    49  			totalMatches = int(sq.Search.IssueCount)
    50  			// If the search won't return all results, abort.
    51  			if totalMatches > 1000 {
    52  				return nil, totalMatches, nil
    53  			}
    54  			for _, n := range sq.Search.Nodes {
    55  				ret = append(ret, n.PullRequest)
    56  			}
    57  			if !sq.Search.PageInfo.HasNextPage {
    58  				break
    59  			}
    60  			vars["searchCursor"] = githubql.NewString(sq.Search.PageInfo.EndCursor)
    61  		}
    62  		log.WithFields(logrus.Fields{
    63  			"query": datedQuery,
    64  			"start": start.String(),
    65  			"end":   start.String(),
    66  		}).Debugf("Query returned %d PRs and cost %d point(s). %d remaining.", len(ret), totalCost, remaining)
    67  		return ret, totalMatches, nil
    68  	}
    69  }
    70  
    71  func (q searchExecutor) search() ([]PullRequest, error) {
    72  	prs, _, err := q.searchRange(time.Time{}, time.Now())
    73  	return prs, err
    74  }
    75  
    76  func (q searchExecutor) searchSince(t time.Time) ([]PullRequest, error) {
    77  	prs, _, err := q.searchRange(t, time.Now())
    78  	return prs, err
    79  }
    80  
    81  func (q searchExecutor) searchRange(start, end time.Time) ([]PullRequest, int, error) {
    82  	// Adjust times to be after GitHub was founded to avoid querying empty time
    83  	// ranges.
    84  	if start.Before(github.FoundingYear) {
    85  		start = github.FoundingYear
    86  	}
    87  	if end.Before(github.FoundingYear) {
    88  		end = github.FoundingYear
    89  	}
    90  
    91  	prs, count, err := q(start, end)
    92  	if err != nil {
    93  		return nil, 0, err
    94  	}
    95  
    96  	if count <= 1000 {
    97  		// The search returned all the results for the query.
    98  		return prs, len(prs), nil
    99  	}
   100  	// The query returned too many results, we need to partition it.
   101  	prs, err = q.partitionSearchRange(start, end, count)
   102  	return prs, len(prs), err
   103  }
   104  
   105  func (q searchExecutor) partitionSearchRange(start, end time.Time, count int) ([]PullRequest, error) {
   106  	partition := partitionTime(start, end, count, 900)
   107  	// Search right side...
   108  	rPRs, rCount, err := q.searchRange(partition, end)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	// Search left side...
   114  	// For the left side we can deduce the count in advance.
   115  	lCount := count - rCount
   116  	// If the count is too large we can skip the initial search and go straight to
   117  	// partitioning to save an API token.
   118  	var lPRs []PullRequest
   119  	if lCount <= 1000 {
   120  		lPRs, _, err = q.searchRange(start, partition)
   121  	} else {
   122  		lPRs, err = q.partitionSearchRange(start, partition, lCount)
   123  	}
   124  	if err != nil {
   125  		return nil, err
   126  	}
   127  
   128  	return append(lPRs, rPRs...), nil
   129  }
   130  
   131  func partitionTime(start, end time.Time, count, goalSize int) time.Time {
   132  	duration := end.Sub(start)
   133  	if count < goalSize*2 {
   134  		// Choose the midpoint.
   135  		return start.Add(duration / 2)
   136  	}
   137  	// Choose the point that will make the partitionTime->end range contain goalSize
   138  	// many results assuming a uniform distribution over time.
   139  	// Use floats to avoid duration overflow.
   140  	// ->    end - (duration * goalSize / count)
   141  	diff := time.Duration(-float64(duration) * (float64(goalSize) / float64(count)))
   142  	return end.Add(diff)
   143  }
   144  
   145  // dateToken generates a GitHub search query token for the specified date range.
   146  // See: https://help.github.com/articles/understanding-the-search-syntax/#query-for-dates
   147  func dateToken(start, end time.Time) string {
   148  	// Github's GraphQL API silently fails if you provide it with an invalid time
   149  	// string.
   150  	// Dates before 1970 (unix epoch) are considered invalid.
   151  	startString, endString := "*", "*"
   152  	if start.Year() >= 1970 {
   153  		startString = start.Format(github.SearchTimeFormat)
   154  	}
   155  	if end.Year() >= 1970 {
   156  		endString = end.Format(github.SearchTimeFormat)
   157  	}
   158  	return fmt.Sprintf("updated:%s..%s", startString, endString)
   159  }