go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/model/filters.go (about) 1 // Copyright 2024 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "fmt" 19 "sort" 20 "strings" 21 22 "go.chromium.org/luci/common/data/stringset" 23 "go.chromium.org/luci/common/errors" 24 "go.chromium.org/luci/gae/service/datastore" 25 26 apipb "go.chromium.org/luci/swarming/proto/api_v2" 27 ) 28 29 // SplitMode is a parameter for SplitForQuery and Apply methods. 30 type SplitMode int 31 32 const ( 33 // SplitOptimally indicates to make as few split as possible. 34 // 35 // Some queries may end up using "OR" filters, but no more than one such 36 // filter per query. Such queries are still accepted by the datastore. 37 SplitOptimally SplitMode = 0 38 39 // SplitCompletely indicates to split a filter into elementary filters. 40 // 41 // Elementary filters do not have "OR" in them. This is used in testing to 42 // cover code paths that merge results of multiple queries. This is needed 43 // because the local testing environment current (as of Jan 2024) doesn't 44 // actually support OR queries at all. 45 SplitCompletely SplitMode = 1 46 ) 47 48 // Filter represents a filter over the space of ["key:value"] tags. 49 // 50 // Conceptually it is a list of AND'ed together checks on values of tags. Each 51 // such check compares each value of some particular tag to a set of allowed 52 // values (often just one). The same tag key is allowed to show up more than 53 // once. In that case there will be more than one filter on values of this tag 54 // (see the example below). 55 // 56 // In API this filter is encoded by a list of `key:val1|val2|val3` pairs, where 57 // keys are allowed to be repeated. 58 // 59 // For example, this filter: 60 // 61 // ["os:Linux", "os:Ubuntu", "zone:us-central|us-east"] 62 // 63 // Will match entities with following tags: 64 // 65 // ["os:Linux", "os:Ubuntu", "os:Ubuntu-20", "zone:us-central"] 66 // ["os:Linux", "os:Ubuntu", "os:Ubuntu-22", "zone:us-easy"] 67 // 68 // But it will not match these entities: 69 // 70 // ["os:Linux", "os:Debian", "zone:us-central"] 71 // ["os:Linux", "os:Ubuntu", "os:Ubuntu-22", "zone:us-west"] 72 type Filter struct { 73 filters []perKeyFilter // sorted by key 74 } 75 76 // perKeyFilter is a filter that checks the value of a single tag key. 77 type perKeyFilter struct { 78 key string // the tag key to check 79 values []string // allowed values (no dups, sorted) 80 } 81 82 // NewFilter parses a list of `("key", "val1|val2|val2")` pairs. 83 // 84 // Empty filter is possible (if `tags` are empty). 85 func NewFilter(tags []*apipb.StringPair) (Filter, error) { 86 filter := Filter{ 87 filters: make([]perKeyFilter, 0, len(tags)), 88 } 89 90 for _, tag := range tags { 91 if strings.TrimSpace(tag.Key) != tag.Key || tag.Key == "" { 92 return filter, errors.Reason("bad key %q", tag.Key).Err() 93 } 94 95 vals := strings.Split(tag.Value, "|") 96 deduped := stringset.New(len(vals)) 97 for _, val := range vals { 98 if strings.TrimSpace(val) != val || val == "" { 99 return filter, errors.Reason("bad value for key %q: %q", tag.Key, tag.Value).Err() 100 } 101 deduped.Add(val) 102 } 103 104 filter.filters = append(filter.filters, perKeyFilter{ 105 key: tag.Key, 106 values: deduped.ToSortedSlice(), 107 }) 108 } 109 110 sort.SliceStable(filter.filters, func(i, j int) bool { 111 return filter.filters[i].key < filter.filters[j].key 112 }) 113 114 return filter, nil 115 } 116 117 // Pools is a list of all pools mentioned in the filter (if any). 118 func (f Filter) Pools() []string { 119 pools := stringset.New(1) // there's usually only 1 pool 120 for _, f := range f.filters { 121 if f.key == "pool" { 122 pools.AddAll(f.values) 123 } 124 } 125 return pools.ToSortedSlice() 126 } 127 128 // IsEmpty is true if this filter doesn't filter anything. 129 func (f Filter) IsEmpty() bool { 130 return len(f.filters) == 0 131 } 132 133 // SplitForQuery splits this filter into several simpler filters that can be 134 // used in datastore queries, with their results merged. 135 // 136 // The unsplit filter is generally too complex for the datastore query planner 137 // to handle using existing indexes (e.g. an index on `dimensions_flat` and 138 // a composite index on `(dimensions_flat, composite)` pair when used for 139 // BotInfo queries). 140 // 141 // Unfortunately due to datastore limits we can't just add all necessary 142 // composite indexes (like `(dimensions_flat, dimensions_flat, composite)` one). 143 // Since `dimensions_flat` is a repeated property, this results in too many 144 // indexed permutations of values, blowing up this index. Possible workarounds 145 // require changing the layout of BotInfo entities in datastore, but that would 146 // require imposing limits on public Swarming API (basically, we'll need to 147 // predefine what dimension keys are worth indexing and what are not; currently 148 // all are indexed). 149 // 150 // Instead we split the query into N subqueries, run them in parallel and merge 151 // results locally. This is relatively expensive and scales poorly, but we need 152 // to do that only for complex queries that use multiple OR property filters. 153 // They are relatively rare. 154 // 155 // If the original filter is empty, returns one empty filter as the output. 156 func (f Filter) SplitForQuery(mode SplitMode) []Filter { 157 // Count how many OR-ed property filters we have, find the smallest one. We'll 158 // use it as a "pivot" for splitting the original filter into smaller filters. 159 // That way we'll have the smallest number of splits. 160 multiValCount := 0 161 pivotIdx := 0 162 for idx, filter := range f.filters { 163 if vals := len(filter.values); vals > 1 { 164 multiValCount += 1 165 if multiValCount == 1 || vals < len(f.filters[pivotIdx].values) { 166 pivotIdx = idx 167 } 168 } 169 } 170 171 var maxMultiVal int 172 switch mode { 173 case SplitOptimally: 174 maxMultiVal = 1 // support at most one OR property filter 175 case SplitCompletely: 176 maxMultiVal = 0 // support no OR property filters at all 177 default: 178 panic(fmt.Sprintf("unknown split mode %d", mode)) 179 } 180 if multiValCount <= maxMultiVal { 181 return []Filter{f} 182 } 183 184 // Split into simpler filters around the pivot eliminating this particular OR. 185 // Keep simplifying the result recursively until we get a list of filters 186 // where each one can be handled by the datastore natively. 187 pivotVals := f.filters[pivotIdx].values 188 simplified := make([]Filter, 0, len(pivotVals)) 189 for _, pivotVal := range pivotVals { 190 subfilter := Filter{ 191 filters: make([]perKeyFilter, 0, len(f.filters)), 192 } 193 for idx, filter := range f.filters { 194 if idx == pivotIdx { 195 // Pivot! Pivot! 196 subfilter.filters = append(subfilter.filters, perKeyFilter{ 197 key: filter.key, 198 values: []string{pivotVal}, 199 }) 200 } else { 201 subfilter.filters = append(subfilter.filters, filter) 202 } 203 } 204 simplified = append(simplified, subfilter.SplitForQuery(mode)...) 205 } 206 207 return simplified 208 } 209 210 // Apply applies this filter to a query, returning (potentially) multiple 211 // queries. 212 // 213 // Results of these queries must be merged locally (e.g. via datastore.RunMulti) 214 // to get the final filtered result. 215 // 216 // `field` is the datastore entity field to apply the filter on. It should be 217 // a multi-valued field with values of form "key:value". 218 // 219 // If the filter is empty, returns a list with the original query as is. 220 func (f Filter) Apply(q *datastore.Query, field string, mode SplitMode) []*datastore.Query { 221 split := f.SplitForQuery(mode) 222 out := make([]*datastore.Query, 0, len(split)) 223 for _, simpleFilter := range split { 224 simpleQ := q 225 for _, f := range simpleFilter.filters { 226 if len(f.values) == 1 { 227 simpleQ = simpleQ.Eq(field, fmt.Sprintf("%s:%s", f.key, f.values[0])) 228 } else { 229 pairs := make([]any, len(f.values)) 230 for i, v := range f.values { 231 pairs[i] = fmt.Sprintf("%s:%s", f.key, v) 232 } 233 simpleQ = simpleQ.In(field, pairs...) 234 } 235 } 236 out = append(out, simpleQ) 237 } 238 return out 239 }