go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/gerrit/poller/partition.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package poller
    16  
    17  import (
    18  	"crypto/sha256"
    19  	"encoding/binary"
    20  	"sort"
    21  	"strings"
    22  
    23  	"go.chromium.org/luci/common/data/stringset"
    24  
    25  	"go.chromium.org/luci/cv/internal/configs/prjcfg"
    26  )
    27  
    28  const (
    29  	// maxReposPerQuery is the maximum number of Gerrit projects that will be
    30  	// polled in 1 OR-query.
    31  	//
    32  	// Gerrit-on-googlesource has a 100 term limit in query after expansion,
    33  	// but some query terms are actually composite.
    34  	maxReposPerQuery = 20
    35  
    36  	// minReposPerPrefixQuery is the minimum number of Gerrit projects with shared
    37  	// prefix to poll using shared prefix query.
    38  	//
    39  	// Because prefix may match other projects not actually relevant to the LUCI
    40  	// project, this constant should not be too low.
    41  	//
    42  	// TODO(tandrii): consider querying Gerrit host to see how many projects
    43  	// actually match this prefix.
    44  	minReposPerPrefixQuery = 20
    45  )
    46  
    47  // partitionConfig partitions LUCI Project config into minimal number of
    48  // queries for efficient querying.
    49  func partitionConfig(cgs []*prjcfg.ConfigGroup) []*QueryState {
    50  	// 1 LUCI project typically watches 1-2 GoB hosts.
    51  	hosts := make([]string, 0, 2)
    52  	repos := make(map[string]stringset.Set, 2)
    53  	for _, cg := range cgs {
    54  		for _, g := range cg.Content.GetGerrit() {
    55  			host := prjcfg.GerritHost(g)
    56  			if repos[host] == nil {
    57  				hosts = append(hosts, host)
    58  				repos[host] = stringset.New(len(g.GetProjects()))
    59  			}
    60  			for _, pr := range g.GetProjects() {
    61  				repos[host].Add(pr.GetName())
    62  			}
    63  		}
    64  	}
    65  	sort.Strings(hosts)
    66  	queries := make([]*QueryState, 0, len(hosts))
    67  	for _, host := range hosts {
    68  		queries = append(queries, partitionHostRepos(host, repos[host].ToSlice(), maxReposPerQuery)...)
    69  	}
    70  	return queries
    71  }
    72  
    73  // partitionHostRepos partitions repos of the same Gerrit host into queries.
    74  // Mutates the passed repos slice.
    75  func partitionHostRepos(host string, repos []string, maxReposPerQuery int) []*QueryState {
    76  	// Heuristic targeting ChromeOS like structure with lots of repos under
    77  	// chromiumos/ prefix.
    78  	byPrefix := make(map[string][]string, 2)
    79  	for _, r := range repos {
    80  		prefix := strings.SplitN(r, "/", 2)[0]
    81  		byPrefix[prefix] = append(byPrefix[prefix], r)
    82  	}
    83  	prefixes := make([]string, len(byPrefix))
    84  	for prefix := range byPrefix {
    85  		prefixes = append(prefixes, prefix)
    86  	}
    87  	sort.Strings(prefixes)
    88  
    89  	queries := make([]*QueryState, 0, 1)
    90  	remainingRepos := repos[:0] // re-use the slice.
    91  	for _, prefix := range prefixes {
    92  		if shared := byPrefix[prefix]; len(shared) < minReposPerPrefixQuery {
    93  			remainingRepos = append(remainingRepos, shared...)
    94  		} else {
    95  			queries = append(queries, &QueryState{
    96  				Host:                host,
    97  				CommonProjectPrefix: prefix,
    98  			})
    99  		}
   100  	}
   101  	if len(remainingRepos) == 0 {
   102  		return queries
   103  	}
   104  
   105  	// Split remainingRepos into queries minimizing max of repos per query.
   106  	// TODO(crbug/1163177): take ref_regexp into account, since most LUCI projects
   107  	// watching >1 project use the same ref(s) for each, which in turn allows to
   108  	// specify `branch:` search term for Gerrit.
   109  	// Note that rounding up positive int division is (x-1)/y + 1.
   110  	neededQueries := (len(remainingRepos)-1)/maxReposPerQuery + 1
   111  	maxPerQuery := (len(remainingRepos)-1)/neededQueries + 1
   112  	sort.Strings(remainingRepos)
   113  	for {
   114  		q := &QueryState{Host: host}
   115  		switch l := len(remainingRepos); {
   116  		case l == 0:
   117  			return queries
   118  		case l <= maxPerQuery:
   119  			q.OrProjects = remainingRepos
   120  		default:
   121  			q.OrProjects = remainingRepos[:maxPerQuery]
   122  		}
   123  		queries = append(queries, q)
   124  		remainingRepos = remainingRepos[len(q.GetOrProjects()):]
   125  	}
   126  }
   127  
   128  func reuseIfPossible(old, proposed []*QueryState) (use, discarded []*QueryState) {
   129  	// Crypto quality hash is used to to infer equality.
   130  	//
   131  	// Each string is emitted as (<len>, string).
   132  	// List of OrProjects is prefixed by its length.
   133  	hash := func(s *QueryState) string {
   134  		h := sha256.New()
   135  
   136  		buf := make([]byte, 10) // varint uint64 will definitely fit.
   137  		writeInt := func(l int) {
   138  			n := binary.PutUvarint(buf, uint64(l))
   139  			h.Write(buf[:n])
   140  		}
   141  
   142  		writeStr := func(s string) {
   143  			writeInt(len(s))
   144  			h.Write([]byte(s))
   145  		}
   146  
   147  		writeStr(s.GetHost())
   148  		writeStr(s.GetCommonProjectPrefix())
   149  		writeInt(len(s.GetOrProjects()))
   150  		for _, p := range s.GetOrProjects() {
   151  			writeStr(p)
   152  		}
   153  		return string(h.Sum(nil))
   154  	}
   155  
   156  	m := make(map[string]*QueryState, len(old))
   157  	for _, o := range old {
   158  		m[hash(o)] = o
   159  	}
   160  	for _, p := range proposed {
   161  		h := hash(p)
   162  		if o, exists := m[h]; exists {
   163  			use = append(use, o)
   164  			delete(m, h)
   165  		} else {
   166  			use = append(use, p)
   167  		}
   168  	}
   169  	for _, o := range m {
   170  		discarded = append(discarded, o)
   171  	}
   172  	return
   173  }