go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/cfg/config.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cfg
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/base64"
    21  	"fmt"
    22  	"regexp"
    23  	"sort"
    24  	"time"
    25  
    26  	"google.golang.org/protobuf/encoding/prototext"
    27  	"google.golang.org/protobuf/proto"
    28  
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/config"
    33  	"go.chromium.org/luci/config/cfgclient"
    34  	"go.chromium.org/luci/config/validation"
    35  	"go.chromium.org/luci/gae/service/datastore"
    36  
    37  	configpb "go.chromium.org/luci/swarming/proto/config"
    38  	"go.chromium.org/luci/swarming/server/cfg/internalcfgpb"
    39  )
    40  
    41  // Individually recognized config files.
    42  const (
    43  	settingsCfg = "settings.cfg"
    44  	poolsCfg    = "pools.cfg"
    45  	botsCfg     = "bots.cfg"
    46  )
    47  
    48  // hookScriptRe matches paths like `scripts/hooks.py`. It intentionally doesn't
    49  // match subdirectories of `scripts/` since they contain unit tests the server
    50  // doesn't care about.
    51  var hookScriptRe = regexp.MustCompile(`scripts/[^/]+\.py`)
    52  
    53  const (
    54  	// A pseudo-revision of an empty config.
    55  	emptyRev = "0000000000000000000000000000000000000000"
    56  	// A digest of a default config (calculated in the test).
    57  	emptyDigest = "0NpkIis/WMci8PDKkLD3PB/t8B86nbBVjyD59iosjOM"
    58  )
    59  
    60  // Config is an immutable queryable representation of Swarming server configs.
    61  //
    62  // It is a snapshot of configs at some particular revision. Use an instance of
    63  // Provider to get it.
    64  type Config struct {
    65  	// Revision is the config repo commit the config was loaded from.
    66  	Revision string
    67  	// Digest is derived exclusively from the configs content.
    68  	Digest string
    69  	// Fetched is when the stored config was fetched from LUIC Config.
    70  	Fetched time.Time
    71  	// Refreshed is when the process config was fetched from the datastore.
    72  	Refreshed time.Time
    73  
    74  	settings  *configpb.SettingsCfg
    75  	poolMap   map[string]*Pool // pool name => config
    76  	poolNames []string         // sorted list of pool names
    77  	botGroups *botGroups       // can map bot ID to a bot group config
    78  }
    79  
    80  // Settings are settings proto with defaults filled in.
    81  func (cfg *Config) Settings() *configpb.SettingsCfg {
    82  	return cfg.settings
    83  }
    84  
    85  // Pool returns a config for the given pool or nil if there's no such pool.
    86  func (cfg *Config) Pool(name string) *Pool {
    87  	return cfg.poolMap[name]
    88  }
    89  
    90  // Pools returns a sorted list of all known pools.
    91  func (cfg *Config) Pools() []string {
    92  	return cfg.poolNames
    93  }
    94  
    95  // BotGroup returns a BotGroup config matching the given bot ID.
    96  //
    97  // Understands composite bot IDs, see HostBotID(...). Always returns some
    98  // config (never nil). If there's no config assigned to a bot, returns a default
    99  // config.
   100  func (cfg *Config) BotGroup(botID string) *BotGroup {
   101  	hostID := HostBotID(botID)
   102  
   103  	// If this is a composite bot ID, try to find if there's a config for this
   104  	// *specific* composite ID first. This acts as an override if we need to
   105  	// single-out a bot that uses a concrete composite IDs.
   106  	if hostID != botID {
   107  		if group := cfg.botGroups.directMatches[botID]; group != nil {
   108  			return group
   109  		}
   110  	}
   111  
   112  	// Otherwise look it up based on the host ID (which is the same as bot ID
   113  	// for non-composite IDs).
   114  	if group := cfg.botGroups.directMatches[hostID]; group != nil {
   115  		return group
   116  	}
   117  	if _, group, ok := cfg.botGroups.prefixMatches.LongestPrefix(hostID); ok {
   118  		return group.(*BotGroup)
   119  	}
   120  	return cfg.botGroups.defaultGroup
   121  }
   122  
   123  // UpdateConfigs fetches the most recent server configs from LUCI Config and
   124  // stores them in the local datastore if they appear to be valid.
   125  //
   126  // Called from a cron job once a minute.
   127  func UpdateConfigs(ctx context.Context) error {
   128  	// Fetch known config files and everything that looks like a hooks script.
   129  	files, err := cfgclient.Client(ctx).GetConfigs(ctx, "services/${appid}",
   130  		func(path string) bool {
   131  			return path == settingsCfg || path == poolsCfg || path == botsCfg || hookScriptRe.MatchString(path)
   132  		}, false)
   133  	if err != nil && !errors.Is(err, config.ErrNoConfig) {
   134  		return errors.Annotate(err, "failed to fetch the most recent configs from LUCI Config").Err()
   135  	}
   136  
   137  	// Get the revision, to check if we have seen it already.
   138  	var revision string
   139  	if len(files) == 0 {
   140  		// This can happen in new deployments.
   141  		logging.Warningf(ctx, "There are no configuration files in LUCI Config")
   142  		revision = emptyRev
   143  	} else {
   144  		// Per GetConfigs logic, all files are at the same revision. Pick the first.
   145  		for _, cfg := range files {
   146  			revision = cfg.Revision
   147  			break
   148  		}
   149  	}
   150  
   151  	// No need to do anything if already processed this revision.
   152  	lastRev := &configBundleRev{Key: configBundleRevKey(ctx)}
   153  	switch err := datastore.Get(ctx, lastRev); {
   154  	case err == nil && lastRev.Revision == revision:
   155  		logging.Infof(ctx, "Configs are already up-to-date at rev %s (fetched %s ago)", lastRev.Revision, clock.Since(ctx, lastRev.Fetched).Round(time.Second))
   156  		return nil
   157  	case err == nil && lastRev.Revision != revision:
   158  		logging.Infof(ctx, "Configs revision changed %s => %s", lastRev.Revision, revision)
   159  	case errors.Is(err, datastore.ErrNoSuchEntity):
   160  		logging.Infof(ctx, "First config import ever at rev %s", revision)
   161  	case err != nil:
   162  		return errors.Annotate(err, "failed to fetch the latest processed revision from datastore").Err()
   163  	}
   164  
   165  	// Parse and re-validate.
   166  	bundle, err := parseAndValidateConfigs(ctx, revision, files)
   167  	if err != nil {
   168  		return errors.Annotate(err, "bad configs at rev %s", revision).Err()
   169  	}
   170  
   171  	// Store as the new authoritative config.
   172  	err = datastore.RunInTransaction(ctx, func(ctx context.Context) error {
   173  		now := clock.Now(ctx).UTC()
   174  		return datastore.Put(ctx,
   175  			&configBundle{
   176  				Key:      configBundleKey(ctx),
   177  				Revision: bundle.Revision,
   178  				Digest:   bundle.Digest,
   179  				Fetched:  now,
   180  				Bundle:   bundle,
   181  			},
   182  			&configBundleRev{
   183  				Key:      configBundleRevKey(ctx),
   184  				Revision: bundle.Revision,
   185  				Digest:   bundle.Digest,
   186  				Fetched:  now,
   187  			})
   188  	}, nil)
   189  
   190  	if err != nil {
   191  		return errors.Annotate(err, "failed to store configs at rev %s in the datastore", bundle.Revision).Err()
   192  	}
   193  
   194  	logging.Infof(ctx, "Stored configs at rev %s (digest %s)", bundle.Revision, bundle.Digest)
   195  	return nil
   196  }
   197  
   198  // defaultConfigs returns default config protos used on an "empty" server.
   199  func defaultConfigs() *internalcfgpb.ConfigBundle {
   200  	return &internalcfgpb.ConfigBundle{
   201  		Revision: emptyRev,
   202  		Digest:   emptyDigest,
   203  		Settings: withDefaultSettings(&configpb.SettingsCfg{}),
   204  		Pools:    &configpb.PoolsCfg{},
   205  		Bots: &configpb.BotsCfg{
   206  			TrustedDimensions: []string{"pool"},
   207  			BotGroup: []*configpb.BotGroup{
   208  				{
   209  					Dimensions: []string{"pool:unassigned"},
   210  					Auth: []*configpb.BotAuth{
   211  						{RequireLuciMachineToken: true, LogIfFailed: true},
   212  					},
   213  				},
   214  			},
   215  		},
   216  		Scripts: map[string]string{},
   217  	}
   218  }
   219  
   220  // parseAndValidateConfigs parses config files fetched from LUCI Config, if any.
   221  func parseAndValidateConfigs(ctx context.Context, rev string, files map[string]config.Config) (*internalcfgpb.ConfigBundle, error) {
   222  	bundle := defaultConfigs()
   223  	bundle.Revision = rev
   224  
   225  	// Parse and validate files individually.
   226  	if err := parseAndValidate(ctx, files, settingsCfg, bundle.Settings, validateSettingsCfg); err != nil {
   227  		return nil, err
   228  	}
   229  	if err := parseAndValidate(ctx, files, poolsCfg, bundle.Pools, validatePoolsCfg); err != nil {
   230  		return nil, err
   231  	}
   232  	if err := parseAndValidate(ctx, files, botsCfg, bundle.Bots, validateBotsCfg); err != nil {
   233  		return nil, err
   234  	}
   235  
   236  	// Make sure all referenced hook scripts are actually present. Collect them.
   237  	for idx, group := range bundle.Bots.BotGroup {
   238  		if group.BotConfigScript == "" {
   239  			continue
   240  		}
   241  		if _, ok := bundle.Scripts[group.BotConfigScript]; ok {
   242  			continue
   243  		}
   244  		body, ok := files["scripts/"+group.BotConfigScript]
   245  		if !ok {
   246  			return nil, errors.Reason("bot group #%d refers to undefined bot config script %q", idx+1, group.BotConfigScript).Err()
   247  		}
   248  		bundle.Scripts[group.BotConfigScript] = body.Content
   249  	}
   250  
   251  	// Derive the digest based exclusively on configs content, regardless of the
   252  	// revision. The revision can change even if configs are unchanged. The digest
   253  	// is used to know when configs are changing **for real**.
   254  	visit := []string{settingsCfg, poolsCfg, botsCfg}
   255  	for script := range bundle.Scripts {
   256  		visit = append(visit, "scripts/"+script)
   257  	}
   258  	sort.Strings(visit)
   259  	h := sha256.New()
   260  	_, _ = fmt.Fprintf(h, "version 1\n")
   261  	for _, path := range visit {
   262  		_, _ = fmt.Fprintf(h, "%s\n%d\n%s\n", path, len(files[path].Content), files[path].Content)
   263  	}
   264  	bundle.Digest = base64.RawStdEncoding.EncodeToString(h.Sum(nil))
   265  
   266  	return bundle, nil
   267  }
   268  
   269  // parseAndValidate parses and validated one text proto config file.
   270  func parseAndValidate[T any, TP interface {
   271  	*T
   272  	proto.Message
   273  }](ctx context.Context,
   274  	files map[string]config.Config,
   275  	path string,
   276  	cfg *T,
   277  	validate func(ctx *validation.Context, t *T),
   278  ) error {
   279  	// Parse it if it is present. Otherwise use the default value of `cfg`.
   280  	if body := files[path]; body.Content != "" {
   281  		if err := prototext.Unmarshal([]byte(body.Content), TP(cfg)); err != nil {
   282  			return errors.Annotate(err, "%s", path).Err()
   283  		}
   284  	} else {
   285  		logging.Warningf(ctx, "There's no %s config, using default", path)
   286  	}
   287  	// Pass through the validation, abort on fatal errors, allow warnings.
   288  	valCtx := validation.Context{Context: ctx}
   289  	validate(&valCtx, cfg)
   290  	if err := valCtx.Finalize(); err != nil {
   291  		var valErr *validation.Error
   292  		if errors.As(err, &valErr) {
   293  			blocking := valErr.WithSeverity(validation.Blocking)
   294  			if blocking != nil {
   295  				return errors.Annotate(blocking, "%s", path).Err()
   296  			}
   297  		} else {
   298  			return errors.Annotate(err, "%s", path).Err()
   299  		}
   300  	}
   301  	return nil
   302  }
   303  
   304  ////////////////////////////////////////////////////////////////////////////////
   305  
   306  // configBundle is an entity that stores latest configs as compressed protos.
   307  type configBundle struct {
   308  	_ datastore.PropertyMap `gae:"-,extra"`
   309  
   310  	Key      *datastore.Key              `gae:"$key"`
   311  	Revision string                      `gae:",noindex"`
   312  	Digest   string                      `gae:",noindex"`
   313  	Fetched  time.Time                   `gae:",noindex"`
   314  	Bundle   *internalcfgpb.ConfigBundle `gae:",zstd"`
   315  }
   316  
   317  // configBundleRev just stores the metadata for faster fetches.
   318  type configBundleRev struct {
   319  	_ datastore.PropertyMap `gae:"-,extra"`
   320  
   321  	Key      *datastore.Key `gae:"$key"`
   322  	Revision string         `gae:",noindex"`
   323  	Digest   string         `gae:",noindex"`
   324  	Fetched  time.Time      `gae:",noindex"`
   325  }
   326  
   327  // configBundleKey is a key of the singleton configBundle entity.
   328  func configBundleKey(ctx context.Context) *datastore.Key {
   329  	return datastore.NewKey(ctx, "ConfigBundle", "", 1, nil)
   330  }
   331  
   332  // configBundleRevKey is a key of the singleton configBundleRev entity.
   333  func configBundleRevKey(ctx context.Context) *datastore.Key {
   334  	return datastore.NewKey(ctx, "ConfigBundleRev", "", 1, configBundleKey(ctx))
   335  }
   336  
   337  // fetchFromDatastore fetches the config from the datastore.
   338  //
   339  // If there's no config in the datastore, returns some default empty config.
   340  //
   341  // If `cur` is not nil its (immutable) parts may be used to construct the
   342  // new Config in case they didn't change.
   343  func fetchFromDatastore(ctx context.Context, cur *Config) (*Config, error) {
   344  	// If already have a config, check if we really need to reload it.
   345  	if cur != nil {
   346  		rev := &configBundleRev{Key: configBundleRevKey(ctx)}
   347  		switch err := datastore.Get(ctx, rev); {
   348  		case errors.Is(err, datastore.ErrNoSuchEntity):
   349  			rev.Revision = emptyRev
   350  			rev.Digest = emptyDigest
   351  		case err != nil:
   352  			return nil, errors.Annotate(err, "fetching configBundleRev").Err()
   353  		}
   354  		if cur.Digest == rev.Digest {
   355  			clone := *cur
   356  			clone.Revision = rev.Revision
   357  			clone.Fetched = rev.Fetched
   358  			clone.Refreshed = clock.Now(ctx).UTC()
   359  			return &clone, nil
   360  		}
   361  	}
   362  
   363  	// Either have no config or the one in the datastore is different. Get it.
   364  	bundle := &configBundle{Key: configBundleKey(ctx)}
   365  	switch err := datastore.Get(ctx, bundle); {
   366  	case errors.Is(err, datastore.ErrNoSuchEntity):
   367  		bundle.Revision = emptyRev
   368  		bundle.Digest = emptyDigest
   369  		bundle.Bundle = defaultConfigs()
   370  	case err != nil:
   371  		return nil, errors.Annotate(err, "fetching configBundle").Err()
   372  	}
   373  
   374  	// Transform config protos into data structures optimized for config queries.
   375  	// This should never really fail, since we store only validated configs. If
   376  	// this fails, the entire service will eventually go offline since new
   377  	// processes won't be able to load initial copy of the config (while old
   378  	// processes will keep using last known good copies, until eventually they
   379  	// all terminate).
   380  	cfg, err := buildQueriableConfig(ctx, bundle)
   381  	if err != nil {
   382  		logging.Errorf(ctx,
   383  			"Broken config in the datastore at rev %s (digest %s, fetched %s ago): %s",
   384  			bundle.Revision, bundle.Digest, clock.Since(ctx, bundle.Fetched), err,
   385  		)
   386  		return nil, errors.Annotate(err, "broken config in the datastore").Err()
   387  	}
   388  	logging.Infof(ctx, "Loaded configs at rev %s", cfg.Revision)
   389  	return cfg, nil
   390  }
   391  
   392  // buildQueriableConfig transforms config protos into data structures optimized
   393  // for config queries.
   394  func buildQueriableConfig(ctx context.Context, ent *configBundle) (*Config, error) {
   395  	pools, err := newPoolsConfig(ent.Bundle.Pools)
   396  	if err != nil {
   397  		return nil, errors.Annotate(err, "bad pools.cfg").Err()
   398  	}
   399  	poolNames := make([]string, 0, len(pools))
   400  	for name := range pools {
   401  		poolNames = append(poolNames, name)
   402  	}
   403  	sort.Strings(poolNames)
   404  
   405  	botGroups, err := newBotGroups(ent.Bundle.Bots)
   406  	if err != nil {
   407  		return nil, errors.Annotate(err, "bad bots.cfg").Err()
   408  	}
   409  
   410  	return &Config{
   411  		Revision:  ent.Revision,
   412  		Digest:    ent.Digest,
   413  		Fetched:   ent.Fetched,
   414  		Refreshed: clock.Now(ctx).UTC(),
   415  		settings:  withDefaultSettings(ent.Bundle.Settings),
   416  		poolMap:   pools,
   417  		poolNames: poolNames,
   418  		botGroups: botGroups,
   419  	}, nil
   420  }