go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/gerrit/poller/partition.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package poller 16 17 import ( 18 "crypto/sha256" 19 "encoding/binary" 20 "sort" 21 "strings" 22 23 "go.chromium.org/luci/common/data/stringset" 24 25 "go.chromium.org/luci/cv/internal/configs/prjcfg" 26 ) 27 28 const ( 29 // maxReposPerQuery is the maximum number of Gerrit projects that will be 30 // polled in 1 OR-query. 31 // 32 // Gerrit-on-googlesource has a 100 term limit in query after expansion, 33 // but some query terms are actually composite. 34 maxReposPerQuery = 20 35 36 // minReposPerPrefixQuery is the minimum number of Gerrit projects with shared 37 // prefix to poll using shared prefix query. 38 // 39 // Because prefix may match other projects not actually relevant to the LUCI 40 // project, this constant should not be too low. 41 // 42 // TODO(tandrii): consider querying Gerrit host to see how many projects 43 // actually match this prefix. 44 minReposPerPrefixQuery = 20 45 ) 46 47 // partitionConfig partitions LUCI Project config into minimal number of 48 // queries for efficient querying. 49 func partitionConfig(cgs []*prjcfg.ConfigGroup) []*QueryState { 50 // 1 LUCI project typically watches 1-2 GoB hosts. 51 hosts := make([]string, 0, 2) 52 repos := make(map[string]stringset.Set, 2) 53 for _, cg := range cgs { 54 for _, g := range cg.Content.GetGerrit() { 55 host := prjcfg.GerritHost(g) 56 if repos[host] == nil { 57 hosts = append(hosts, host) 58 repos[host] = stringset.New(len(g.GetProjects())) 59 } 60 for _, pr := range g.GetProjects() { 61 repos[host].Add(pr.GetName()) 62 } 63 } 64 } 65 sort.Strings(hosts) 66 queries := make([]*QueryState, 0, len(hosts)) 67 for _, host := range hosts { 68 queries = append(queries, partitionHostRepos(host, repos[host].ToSlice(), maxReposPerQuery)...) 69 } 70 return queries 71 } 72 73 // partitionHostRepos partitions repos of the same Gerrit host into queries. 74 // Mutates the passed repos slice. 75 func partitionHostRepos(host string, repos []string, maxReposPerQuery int) []*QueryState { 76 // Heuristic targeting ChromeOS like structure with lots of repos under 77 // chromiumos/ prefix. 78 byPrefix := make(map[string][]string, 2) 79 for _, r := range repos { 80 prefix := strings.SplitN(r, "/", 2)[0] 81 byPrefix[prefix] = append(byPrefix[prefix], r) 82 } 83 prefixes := make([]string, len(byPrefix)) 84 for prefix := range byPrefix { 85 prefixes = append(prefixes, prefix) 86 } 87 sort.Strings(prefixes) 88 89 queries := make([]*QueryState, 0, 1) 90 remainingRepos := repos[:0] // re-use the slice. 91 for _, prefix := range prefixes { 92 if shared := byPrefix[prefix]; len(shared) < minReposPerPrefixQuery { 93 remainingRepos = append(remainingRepos, shared...) 94 } else { 95 queries = append(queries, &QueryState{ 96 Host: host, 97 CommonProjectPrefix: prefix, 98 }) 99 } 100 } 101 if len(remainingRepos) == 0 { 102 return queries 103 } 104 105 // Split remainingRepos into queries minimizing max of repos per query. 106 // TODO(crbug/1163177): take ref_regexp into account, since most LUCI projects 107 // watching >1 project use the same ref(s) for each, which in turn allows to 108 // specify `branch:` search term for Gerrit. 109 // Note that rounding up positive int division is (x-1)/y + 1. 110 neededQueries := (len(remainingRepos)-1)/maxReposPerQuery + 1 111 maxPerQuery := (len(remainingRepos)-1)/neededQueries + 1 112 sort.Strings(remainingRepos) 113 for { 114 q := &QueryState{Host: host} 115 switch l := len(remainingRepos); { 116 case l == 0: 117 return queries 118 case l <= maxPerQuery: 119 q.OrProjects = remainingRepos 120 default: 121 q.OrProjects = remainingRepos[:maxPerQuery] 122 } 123 queries = append(queries, q) 124 remainingRepos = remainingRepos[len(q.GetOrProjects()):] 125 } 126 } 127 128 func reuseIfPossible(old, proposed []*QueryState) (use, discarded []*QueryState) { 129 // Crypto quality hash is used to to infer equality. 130 // 131 // Each string is emitted as (<len>, string). 132 // List of OrProjects is prefixed by its length. 133 hash := func(s *QueryState) string { 134 h := sha256.New() 135 136 buf := make([]byte, 10) // varint uint64 will definitely fit. 137 writeInt := func(l int) { 138 n := binary.PutUvarint(buf, uint64(l)) 139 h.Write(buf[:n]) 140 } 141 142 writeStr := func(s string) { 143 writeInt(len(s)) 144 h.Write([]byte(s)) 145 } 146 147 writeStr(s.GetHost()) 148 writeStr(s.GetCommonProjectPrefix()) 149 writeInt(len(s.GetOrProjects())) 150 for _, p := range s.GetOrProjects() { 151 writeStr(p) 152 } 153 return string(h.Sum(nil)) 154 } 155 156 m := make(map[string]*QueryState, len(old)) 157 for _, o := range old { 158 m[hash(o)] = o 159 } 160 for _, p := range proposed { 161 h := hash(p) 162 if o, exists := m[h]; exists { 163 use = append(use, o) 164 delete(m, h) 165 } else { 166 use = append(use, p) 167 } 168 } 169 for _, o := range m { 170 discarded = append(discarded, o) 171 } 172 return 173 }