github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/tide/search.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tide 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "k8s.io/test-infra/prow/github" 25 26 githubql "github.com/shurcooL/githubv4" 27 "github.com/sirupsen/logrus" 28 ) 29 30 type searchExecutor func(start, end time.Time) ([]PullRequest, int /*true match count*/, error) 31 32 func newSearchExecutor(ctx context.Context, ghc githubClient, log *logrus.Entry, q string) searchExecutor { 33 return func(start, end time.Time) ([]PullRequest, int, error) { 34 datedQuery := fmt.Sprintf("%s %s", q, dateToken(start, end)) 35 vars := map[string]interface{}{ 36 "query": githubql.String(datedQuery), 37 "searchCursor": (*githubql.String)(nil), 38 } 39 var totalCost, remaining int 40 var totalMatches int 41 var ret []PullRequest 42 for { 43 sq := searchQuery{} 44 if err := ghc.Query(ctx, &sq, vars); err != nil { 45 return nil, 0, fmt.Errorf("error handling query: %q, err: %v", datedQuery, err) 46 } 47 totalCost += int(sq.RateLimit.Cost) 48 remaining = int(sq.RateLimit.Remaining) 49 totalMatches = int(sq.Search.IssueCount) 50 // If the search won't return all results, abort. 51 if totalMatches > 1000 { 52 return nil, totalMatches, nil 53 } 54 for _, n := range sq.Search.Nodes { 55 ret = append(ret, n.PullRequest) 56 } 57 if !sq.Search.PageInfo.HasNextPage { 58 break 59 } 60 vars["searchCursor"] = githubql.NewString(sq.Search.PageInfo.EndCursor) 61 } 62 log.WithFields(logrus.Fields{ 63 "query": datedQuery, 64 "start": start.String(), 65 "end": start.String(), 66 }).Debugf("Query returned %d PRs and cost %d point(s). %d remaining.", len(ret), totalCost, remaining) 67 return ret, totalMatches, nil 68 } 69 } 70 71 func (q searchExecutor) search() ([]PullRequest, error) { 72 prs, _, err := q.searchRange(time.Time{}, time.Now()) 73 return prs, err 74 } 75 76 func (q searchExecutor) searchSince(t time.Time) ([]PullRequest, error) { 77 prs, _, err := q.searchRange(t, time.Now()) 78 return prs, err 79 } 80 81 func (q searchExecutor) searchRange(start, end time.Time) ([]PullRequest, int, error) { 82 // Adjust times to be after GitHub was founded to avoid querying empty time 83 // ranges. 84 if start.Before(github.FoundingYear) { 85 start = github.FoundingYear 86 } 87 if end.Before(github.FoundingYear) { 88 end = github.FoundingYear 89 } 90 91 prs, count, err := q(start, end) 92 if err != nil { 93 return nil, 0, err 94 } 95 96 if count <= 1000 { 97 // The search returned all the results for the query. 98 return prs, len(prs), nil 99 } 100 // The query returned too many results, we need to partition it. 101 prs, err = q.partitionSearchRange(start, end, count) 102 return prs, len(prs), err 103 } 104 105 func (q searchExecutor) partitionSearchRange(start, end time.Time, count int) ([]PullRequest, error) { 106 partition := partitionTime(start, end, count, 900) 107 // Search right side... 108 rPRs, rCount, err := q.searchRange(partition, end) 109 if err != nil { 110 return nil, err 111 } 112 113 // Search left side... 114 // For the left side we can deduce the count in advance. 115 lCount := count - rCount 116 // If the count is too large we can skip the initial search and go straight to 117 // partitioning to save an API token. 118 var lPRs []PullRequest 119 if lCount <= 1000 { 120 lPRs, _, err = q.searchRange(start, partition) 121 } else { 122 lPRs, err = q.partitionSearchRange(start, partition, lCount) 123 } 124 if err != nil { 125 return nil, err 126 } 127 128 return append(lPRs, rPRs...), nil 129 } 130 131 func partitionTime(start, end time.Time, count, goalSize int) time.Time { 132 duration := end.Sub(start) 133 if count < goalSize*2 { 134 // Choose the midpoint. 135 return start.Add(duration / 2) 136 } 137 // Choose the point that will make the partitionTime->end range contain goalSize 138 // many results assuming a uniform distribution over time. 139 // Use floats to avoid duration overflow. 140 // -> end - (duration * goalSize / count) 141 diff := time.Duration(-float64(duration) * (float64(goalSize) / float64(count))) 142 return end.Add(diff) 143 } 144 145 // dateToken generates a GitHub search query token for the specified date range. 146 // See: https://help.github.com/articles/understanding-the-search-syntax/#query-for-dates 147 func dateToken(start, end time.Time) string { 148 // Github's GraphQL API silently fails if you provide it with an invalid time 149 // string. 150 // Dates before 1970 (unix epoch) are considered invalid. 151 startString, endString := "*", "*" 152 if start.Year() >= 1970 { 153 startString = start.Format(github.SearchTimeFormat) 154 } 155 if end.Year() >= 1970 { 156 endString = end.Format(github.SearchTimeFormat) 157 } 158 return fmt.Sprintf("updated:%s..%s", startString, endString) 159 }