go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/cfg/config.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cfg 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/base64" 21 "fmt" 22 "regexp" 23 "sort" 24 "time" 25 26 "google.golang.org/protobuf/encoding/prototext" 27 "google.golang.org/protobuf/proto" 28 29 "go.chromium.org/luci/common/clock" 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/config" 33 "go.chromium.org/luci/config/cfgclient" 34 "go.chromium.org/luci/config/validation" 35 "go.chromium.org/luci/gae/service/datastore" 36 37 configpb "go.chromium.org/luci/swarming/proto/config" 38 "go.chromium.org/luci/swarming/server/cfg/internalcfgpb" 39 ) 40 41 // Individually recognized config files. 42 const ( 43 settingsCfg = "settings.cfg" 44 poolsCfg = "pools.cfg" 45 botsCfg = "bots.cfg" 46 ) 47 48 // hookScriptRe matches paths like `scripts/hooks.py`. It intentionally doesn't 49 // match subdirectories of `scripts/` since they contain unit tests the server 50 // doesn't care about. 51 var hookScriptRe = regexp.MustCompile(`scripts/[^/]+\.py`) 52 53 const ( 54 // A pseudo-revision of an empty config. 55 emptyRev = "0000000000000000000000000000000000000000" 56 // A digest of a default config (calculated in the test). 57 emptyDigest = "0NpkIis/WMci8PDKkLD3PB/t8B86nbBVjyD59iosjOM" 58 ) 59 60 // Config is an immutable queryable representation of Swarming server configs. 61 // 62 // It is a snapshot of configs at some particular revision. Use an instance of 63 // Provider to get it. 64 type Config struct { 65 // Revision is the config repo commit the config was loaded from. 66 Revision string 67 // Digest is derived exclusively from the configs content. 68 Digest string 69 // Fetched is when the stored config was fetched from LUIC Config. 70 Fetched time.Time 71 // Refreshed is when the process config was fetched from the datastore. 72 Refreshed time.Time 73 74 settings *configpb.SettingsCfg 75 poolMap map[string]*Pool // pool name => config 76 poolNames []string // sorted list of pool names 77 botGroups *botGroups // can map bot ID to a bot group config 78 } 79 80 // Settings are settings proto with defaults filled in. 81 func (cfg *Config) Settings() *configpb.SettingsCfg { 82 return cfg.settings 83 } 84 85 // Pool returns a config for the given pool or nil if there's no such pool. 86 func (cfg *Config) Pool(name string) *Pool { 87 return cfg.poolMap[name] 88 } 89 90 // Pools returns a sorted list of all known pools. 91 func (cfg *Config) Pools() []string { 92 return cfg.poolNames 93 } 94 95 // BotGroup returns a BotGroup config matching the given bot ID. 96 // 97 // Understands composite bot IDs, see HostBotID(...). Always returns some 98 // config (never nil). If there's no config assigned to a bot, returns a default 99 // config. 100 func (cfg *Config) BotGroup(botID string) *BotGroup { 101 hostID := HostBotID(botID) 102 103 // If this is a composite bot ID, try to find if there's a config for this 104 // *specific* composite ID first. This acts as an override if we need to 105 // single-out a bot that uses a concrete composite IDs. 106 if hostID != botID { 107 if group := cfg.botGroups.directMatches[botID]; group != nil { 108 return group 109 } 110 } 111 112 // Otherwise look it up based on the host ID (which is the same as bot ID 113 // for non-composite IDs). 114 if group := cfg.botGroups.directMatches[hostID]; group != nil { 115 return group 116 } 117 if _, group, ok := cfg.botGroups.prefixMatches.LongestPrefix(hostID); ok { 118 return group.(*BotGroup) 119 } 120 return cfg.botGroups.defaultGroup 121 } 122 123 // UpdateConfigs fetches the most recent server configs from LUCI Config and 124 // stores them in the local datastore if they appear to be valid. 125 // 126 // Called from a cron job once a minute. 127 func UpdateConfigs(ctx context.Context) error { 128 // Fetch known config files and everything that looks like a hooks script. 129 files, err := cfgclient.Client(ctx).GetConfigs(ctx, "services/${appid}", 130 func(path string) bool { 131 return path == settingsCfg || path == poolsCfg || path == botsCfg || hookScriptRe.MatchString(path) 132 }, false) 133 if err != nil && !errors.Is(err, config.ErrNoConfig) { 134 return errors.Annotate(err, "failed to fetch the most recent configs from LUCI Config").Err() 135 } 136 137 // Get the revision, to check if we have seen it already. 138 var revision string 139 if len(files) == 0 { 140 // This can happen in new deployments. 141 logging.Warningf(ctx, "There are no configuration files in LUCI Config") 142 revision = emptyRev 143 } else { 144 // Per GetConfigs logic, all files are at the same revision. Pick the first. 145 for _, cfg := range files { 146 revision = cfg.Revision 147 break 148 } 149 } 150 151 // No need to do anything if already processed this revision. 152 lastRev := &configBundleRev{Key: configBundleRevKey(ctx)} 153 switch err := datastore.Get(ctx, lastRev); { 154 case err == nil && lastRev.Revision == revision: 155 logging.Infof(ctx, "Configs are already up-to-date at rev %s (fetched %s ago)", lastRev.Revision, clock.Since(ctx, lastRev.Fetched).Round(time.Second)) 156 return nil 157 case err == nil && lastRev.Revision != revision: 158 logging.Infof(ctx, "Configs revision changed %s => %s", lastRev.Revision, revision) 159 case errors.Is(err, datastore.ErrNoSuchEntity): 160 logging.Infof(ctx, "First config import ever at rev %s", revision) 161 case err != nil: 162 return errors.Annotate(err, "failed to fetch the latest processed revision from datastore").Err() 163 } 164 165 // Parse and re-validate. 166 bundle, err := parseAndValidateConfigs(ctx, revision, files) 167 if err != nil { 168 return errors.Annotate(err, "bad configs at rev %s", revision).Err() 169 } 170 171 // Store as the new authoritative config. 172 err = datastore.RunInTransaction(ctx, func(ctx context.Context) error { 173 now := clock.Now(ctx).UTC() 174 return datastore.Put(ctx, 175 &configBundle{ 176 Key: configBundleKey(ctx), 177 Revision: bundle.Revision, 178 Digest: bundle.Digest, 179 Fetched: now, 180 Bundle: bundle, 181 }, 182 &configBundleRev{ 183 Key: configBundleRevKey(ctx), 184 Revision: bundle.Revision, 185 Digest: bundle.Digest, 186 Fetched: now, 187 }) 188 }, nil) 189 190 if err != nil { 191 return errors.Annotate(err, "failed to store configs at rev %s in the datastore", bundle.Revision).Err() 192 } 193 194 logging.Infof(ctx, "Stored configs at rev %s (digest %s)", bundle.Revision, bundle.Digest) 195 return nil 196 } 197 198 // defaultConfigs returns default config protos used on an "empty" server. 199 func defaultConfigs() *internalcfgpb.ConfigBundle { 200 return &internalcfgpb.ConfigBundle{ 201 Revision: emptyRev, 202 Digest: emptyDigest, 203 Settings: withDefaultSettings(&configpb.SettingsCfg{}), 204 Pools: &configpb.PoolsCfg{}, 205 Bots: &configpb.BotsCfg{ 206 TrustedDimensions: []string{"pool"}, 207 BotGroup: []*configpb.BotGroup{ 208 { 209 Dimensions: []string{"pool:unassigned"}, 210 Auth: []*configpb.BotAuth{ 211 {RequireLuciMachineToken: true, LogIfFailed: true}, 212 }, 213 }, 214 }, 215 }, 216 Scripts: map[string]string{}, 217 } 218 } 219 220 // parseAndValidateConfigs parses config files fetched from LUCI Config, if any. 221 func parseAndValidateConfigs(ctx context.Context, rev string, files map[string]config.Config) (*internalcfgpb.ConfigBundle, error) { 222 bundle := defaultConfigs() 223 bundle.Revision = rev 224 225 // Parse and validate files individually. 226 if err := parseAndValidate(ctx, files, settingsCfg, bundle.Settings, validateSettingsCfg); err != nil { 227 return nil, err 228 } 229 if err := parseAndValidate(ctx, files, poolsCfg, bundle.Pools, validatePoolsCfg); err != nil { 230 return nil, err 231 } 232 if err := parseAndValidate(ctx, files, botsCfg, bundle.Bots, validateBotsCfg); err != nil { 233 return nil, err 234 } 235 236 // Make sure all referenced hook scripts are actually present. Collect them. 237 for idx, group := range bundle.Bots.BotGroup { 238 if group.BotConfigScript == "" { 239 continue 240 } 241 if _, ok := bundle.Scripts[group.BotConfigScript]; ok { 242 continue 243 } 244 body, ok := files["scripts/"+group.BotConfigScript] 245 if !ok { 246 return nil, errors.Reason("bot group #%d refers to undefined bot config script %q", idx+1, group.BotConfigScript).Err() 247 } 248 bundle.Scripts[group.BotConfigScript] = body.Content 249 } 250 251 // Derive the digest based exclusively on configs content, regardless of the 252 // revision. The revision can change even if configs are unchanged. The digest 253 // is used to know when configs are changing **for real**. 254 visit := []string{settingsCfg, poolsCfg, botsCfg} 255 for script := range bundle.Scripts { 256 visit = append(visit, "scripts/"+script) 257 } 258 sort.Strings(visit) 259 h := sha256.New() 260 _, _ = fmt.Fprintf(h, "version 1\n") 261 for _, path := range visit { 262 _, _ = fmt.Fprintf(h, "%s\n%d\n%s\n", path, len(files[path].Content), files[path].Content) 263 } 264 bundle.Digest = base64.RawStdEncoding.EncodeToString(h.Sum(nil)) 265 266 return bundle, nil 267 } 268 269 // parseAndValidate parses and validated one text proto config file. 270 func parseAndValidate[T any, TP interface { 271 *T 272 proto.Message 273 }](ctx context.Context, 274 files map[string]config.Config, 275 path string, 276 cfg *T, 277 validate func(ctx *validation.Context, t *T), 278 ) error { 279 // Parse it if it is present. Otherwise use the default value of `cfg`. 280 if body := files[path]; body.Content != "" { 281 if err := prototext.Unmarshal([]byte(body.Content), TP(cfg)); err != nil { 282 return errors.Annotate(err, "%s", path).Err() 283 } 284 } else { 285 logging.Warningf(ctx, "There's no %s config, using default", path) 286 } 287 // Pass through the validation, abort on fatal errors, allow warnings. 288 valCtx := validation.Context{Context: ctx} 289 validate(&valCtx, cfg) 290 if err := valCtx.Finalize(); err != nil { 291 var valErr *validation.Error 292 if errors.As(err, &valErr) { 293 blocking := valErr.WithSeverity(validation.Blocking) 294 if blocking != nil { 295 return errors.Annotate(blocking, "%s", path).Err() 296 } 297 } else { 298 return errors.Annotate(err, "%s", path).Err() 299 } 300 } 301 return nil 302 } 303 304 //////////////////////////////////////////////////////////////////////////////// 305 306 // configBundle is an entity that stores latest configs as compressed protos. 307 type configBundle struct { 308 _ datastore.PropertyMap `gae:"-,extra"` 309 310 Key *datastore.Key `gae:"$key"` 311 Revision string `gae:",noindex"` 312 Digest string `gae:",noindex"` 313 Fetched time.Time `gae:",noindex"` 314 Bundle *internalcfgpb.ConfigBundle `gae:",zstd"` 315 } 316 317 // configBundleRev just stores the metadata for faster fetches. 318 type configBundleRev struct { 319 _ datastore.PropertyMap `gae:"-,extra"` 320 321 Key *datastore.Key `gae:"$key"` 322 Revision string `gae:",noindex"` 323 Digest string `gae:",noindex"` 324 Fetched time.Time `gae:",noindex"` 325 } 326 327 // configBundleKey is a key of the singleton configBundle entity. 328 func configBundleKey(ctx context.Context) *datastore.Key { 329 return datastore.NewKey(ctx, "ConfigBundle", "", 1, nil) 330 } 331 332 // configBundleRevKey is a key of the singleton configBundleRev entity. 333 func configBundleRevKey(ctx context.Context) *datastore.Key { 334 return datastore.NewKey(ctx, "ConfigBundleRev", "", 1, configBundleKey(ctx)) 335 } 336 337 // fetchFromDatastore fetches the config from the datastore. 338 // 339 // If there's no config in the datastore, returns some default empty config. 340 // 341 // If `cur` is not nil its (immutable) parts may be used to construct the 342 // new Config in case they didn't change. 343 func fetchFromDatastore(ctx context.Context, cur *Config) (*Config, error) { 344 // If already have a config, check if we really need to reload it. 345 if cur != nil { 346 rev := &configBundleRev{Key: configBundleRevKey(ctx)} 347 switch err := datastore.Get(ctx, rev); { 348 case errors.Is(err, datastore.ErrNoSuchEntity): 349 rev.Revision = emptyRev 350 rev.Digest = emptyDigest 351 case err != nil: 352 return nil, errors.Annotate(err, "fetching configBundleRev").Err() 353 } 354 if cur.Digest == rev.Digest { 355 clone := *cur 356 clone.Revision = rev.Revision 357 clone.Fetched = rev.Fetched 358 clone.Refreshed = clock.Now(ctx).UTC() 359 return &clone, nil 360 } 361 } 362 363 // Either have no config or the one in the datastore is different. Get it. 364 bundle := &configBundle{Key: configBundleKey(ctx)} 365 switch err := datastore.Get(ctx, bundle); { 366 case errors.Is(err, datastore.ErrNoSuchEntity): 367 bundle.Revision = emptyRev 368 bundle.Digest = emptyDigest 369 bundle.Bundle = defaultConfigs() 370 case err != nil: 371 return nil, errors.Annotate(err, "fetching configBundle").Err() 372 } 373 374 // Transform config protos into data structures optimized for config queries. 375 // This should never really fail, since we store only validated configs. If 376 // this fails, the entire service will eventually go offline since new 377 // processes won't be able to load initial copy of the config (while old 378 // processes will keep using last known good copies, until eventually they 379 // all terminate). 380 cfg, err := buildQueriableConfig(ctx, bundle) 381 if err != nil { 382 logging.Errorf(ctx, 383 "Broken config in the datastore at rev %s (digest %s, fetched %s ago): %s", 384 bundle.Revision, bundle.Digest, clock.Since(ctx, bundle.Fetched), err, 385 ) 386 return nil, errors.Annotate(err, "broken config in the datastore").Err() 387 } 388 logging.Infof(ctx, "Loaded configs at rev %s", cfg.Revision) 389 return cfg, nil 390 } 391 392 // buildQueriableConfig transforms config protos into data structures optimized 393 // for config queries. 394 func buildQueriableConfig(ctx context.Context, ent *configBundle) (*Config, error) { 395 pools, err := newPoolsConfig(ent.Bundle.Pools) 396 if err != nil { 397 return nil, errors.Annotate(err, "bad pools.cfg").Err() 398 } 399 poolNames := make([]string, 0, len(pools)) 400 for name := range pools { 401 poolNames = append(poolNames, name) 402 } 403 sort.Strings(poolNames) 404 405 botGroups, err := newBotGroups(ent.Bundle.Bots) 406 if err != nil { 407 return nil, errors.Annotate(err, "bad bots.cfg").Err() 408 } 409 410 return &Config{ 411 Revision: ent.Revision, 412 Digest: ent.Digest, 413 Fetched: ent.Fetched, 414 Refreshed: clock.Now(ctx).UTC(), 415 settings: withDefaultSettings(ent.Bundle.Settings), 416 poolMap: pools, 417 poolNames: poolNames, 418 botGroups: botGroups, 419 }, nil 420 }