go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/proto/config/pools.proto (about)

     1  // Copyright 2017 The LUCI Authors. All rights reserved.
     2  // Use of this source code is governed under the Apache License, Version 2.0
     3  // that can be found in the LICENSE file.
     4  
     5  syntax = "proto3";
     6  
     7  package swarming.config;
     8  
     9  import "go.chromium.org/luci/swarming/proto/config/config.proto";
    10  import "go.chromium.org/luci/swarming/proto/config/realms.proto";
    11  
    12  option go_package = "go.chromium.org/luci/swarming/proto/config;configpb";
    13  
    14  import "go.chromium.org/luci/common/proto/options.proto";
    15  
    16  option (luci.file_metadata) = {
    17    doc_url: "https://config.luci.app/schemas/services/swarming:pools.cfg";
    18  };
    19  
    20  // Schema for pools.cfg service config file in luci-config.
    21  //
    22  // It defined a set of Pool objects, each one corresponding to a single Swarming
    23  // pool dimension. Each Swarming task resided in some pool, and each Swarming
    24  // bot belongs to at least one pool.
    25  //
    26  // Pools are used to isolate groups of tasks/bots from each other for security
    27  // and capacity reasons. Two different pools should not interfere with each
    28  // other at all (unless explicitly configured to share bots or accounts).
    29  message PoolsCfg {
    30    reserved 2;
    31  
    32    // List of all defined pools.
    33    repeated Pool pool = 1;
    34  
    35    // Configures the default isolate and CIPD services to use for all pools on
    36    // this server.
    37    ExternalServices default_external_services = 6;
    38  
    39    // This is the "shared namespace" of task templates.
    40    //
    41    // Task templates allow pools to specify some property defaults (particularly
    42    // around caches, CIPD packages and Environment variables) for tasks created
    43    // within the pool. These templates can have 'include' statements, and those
    44    // include statements draw from this namespace.
    45    //
    46    // Swarming will do a 2-pass parse of these so order doesn't matter (i.e. If
    47    // A includes B, but is defined B-then-A, it's not an error).
    48    repeated TaskTemplate task_template = 3;
    49  
    50    // This is the "shared namespace" of deployments.
    51    //
    52    // When pools specify a task_template_deployment, it draws from this
    53    // namespace.
    54    repeated TaskTemplateDeployment task_template_deployment = 4;
    55  
    56    // Defines about how to monitor bots in a pool. Each pool above may refer to
    57    // one of the BotMonitoring message by name, which permits reusing
    58    // BotMonitoring definitions.
    59    repeated BotMonitoring bot_monitoring = 5;
    60  }
    61  
    62  
    63  // Properties of a single pool or a bunch of identically configured pools.
    64  //
    65  // In particular contains authorization configuration.
    66  message Pool {
    67    // Names of the pools this config applies to.
    68    //
    69    // Tasks target the pool by specifying its name as 'pool' dimension, thus
    70    // names here should be valid dimension value.
    71    repeated string name = 1;
    72  
    73    // Contact information for people that own this pool.
    74    //
    75    // Not used in any ACLs, just informational field.
    76    repeated string owners = 2;
    77  
    78    // Defines who can schedule tasks in this pool.
    79    //
    80    // The checks here act as a second authorization layer, consulted after the
    81    // first server-global one (defined based on groups set in settings.cfg, see
    82    // AuthSettings in config.proto).
    83    Schedulers schedulers = 3;
    84  
    85    reserved 4, 5;
    86    reserved "allowed_service_account", "allowed_service_account_group";
    87  
    88    oneof task_deployment_scheme {
    89      // Most Pools will include a task_template_deployment by name.
    90      string task_template_deployment = 6;
    91  
    92      // However, pools which substantially differ from other ones can define an
    93      // entire deployment inline without putting it in the shared namespace.
    94      //
    95      // The name fields in this deployment and any embedded task_templates must
    96      // not be specified.
    97      TaskTemplateDeployment task_template_deployment_inline = 7;
    98    }
    99  
   100    // Refer to one bot_monitoring at the file level by name.
   101    string bot_monitoring = 8;
   102  
   103    // If specified, this is the description of the external schedulers to be used
   104    // for tasks and bots for this pool that match the dimension set of a scheduler.
   105    // For a given task or bot, the first entry in this list that matches based on
   106    // dimension eligibility will be used.
   107    repeated ExternalSchedulerConfig external_schedulers = 9;
   108  
   109    // Realm name that the pool is associated with.
   110    //
   111    // e.g.
   112    // 'infra:pool/flex/try' for 'luci.flex.try' pool
   113    //
   114    // See also
   115    // https://chromium.googlesource.com/infra/luci/luci-go/+/HEAD/server/auth/service/protocol/components/auth/proto/realms.proto
   116    string realm = 10;
   117  
   118    // Enforcements of permissions can be controlled by pool during migration
   119    // from legacy ACLs configs to Realms configs.
   120    //
   121    // When scheduling tasks:
   122    //   * If a task doesn't have a realm (i.e. it is a legacy task), it will be
   123    //     assigned `default_task_realm` and only permissions listed here will be
   124    //     enforced. If some permission is not enforced, Swarming will use a
   125    //     legacy ACL check for it instead.
   126    //   * If a task has a realm (i.e. it is a modern task aware of realms), all
   127    //     permissions will always be enforced for it. Legacy ACLs will not be
   128    //     used at all.
   129    //
   130    // This field is not used for permissions not related to task scheduling.
   131    //
   132    // This field will be deprecated after migration. All scheduling permissions
   133    // will be enforced at all times.
   134    repeated RealmPermission enforced_realm_permissions = 11;
   135  
   136    // Realm name to use for tasks if they don't have a realm associated.
   137    string default_task_realm = 12;
   138  
   139    // Settings controlling migration to the RBE Scheduler.
   140    message RBEMigration {
   141      // An RBE instance to send tasks to.
   142      string rbe_instance = 1;
   143      // Approximate percent of tasks targeting this pool to send to RBE.
   144      //
   145      // The decision is done randomly when the task is scheduled.
   146      //
   147      // Additionally tasks that have `rbe:require` tag will always use RBE and
   148      // tasks that have `rbe:prevent` tag will never use RBE. If both tags are
   149      // set, `rbe:prevent` takes precedence.
   150      int32 rbe_mode_percent = 2;
   151      // Distribution of bots in this pool across migration modes. Percents must
   152      // sum up to 100.
   153      //
   154      // Bots are assigned the corresponding mode based on hash of their ID modulo
   155      // 100. The full space of such IDs always looks like this:
   156      //
   157      //    [---SWARMING---|---HYBRID---|---RBE---]
   158      //
   159      // Where width of sections are defined by `percent` below. In other words,
   160      // there are two boundaries that can be moved:
   161      //   1. SWARMING <-> HYBRID.
   162      //   2. HYBRID <-> RBE.
   163      //
   164      // Migration starts with all bots being SWARMING, then SWARMING <-> HYBRID
   165      // boundary is moved until all bots are HYBRID. Then HYBRID <-> RBE boundary
   166      // is moved until all bots are RBE.
   167      //
   168      // If a bot belongs to multiple pools (should be rare), the mode is derived
   169      // to be compatible across all bot's pools:
   170      //   1. All pools indicate the bot should be in SWARMING => use SWARMING.
   171      //   2. All pools indicate the bot should be in RBE => use RBE.
   172      //   3. Otherwise use HYBRID.
   173      message BotModeAllocation {
   174        enum BotMode {
   175          UNKNOWN = 0;
   176          SWARMING = 1;
   177          HYBRID = 2;
   178          RBE = 3;
   179        }
   180        BotMode mode = 1;
   181        int32 percent = 2;
   182      }
   183      repeated BotModeAllocation bot_mode_allocation = 3;
   184    }
   185    RBEMigration rbe_migration = 13;
   186  
   187    // Controls the scheduling algorithm used by Swarming to schedule tasks.
   188    // _gen_queue_number() in server/task_to_run.py uses this to control the
   189    // queue_number of a task, which the ordering of tasks to run is based on.
   190    enum SchedulingAlgorithm {
   191      // Unknown or unspecified scheduling algorithm.
   192      SCHEDULING_ALGORITHM_UNKNOWN = 0;
   193      // First in first out (FIFO) scheduling algorithm.
   194      //
   195      // First task that comes in is scheduled first.
   196      SCHEDULING_ALGORITHM_FIFO = 1;
   197      // Last in first out (LIFO) scheduling algorithm.
   198      //
   199      // Last task that comes in is scheduled first.
   200      SCHEDULING_ALGORITHM_LIFO = 2;
   201    }
   202    SchedulingAlgorithm scheduling_algorithm = 14;
   203  }
   204  
   205  
   206  // Defines who can schedule tasks in a pool.
   207  message Schedulers {
   208    // Emails of individual end-users.
   209    //
   210    // Useful to avoid creating one-person groups.
   211    repeated string user = 1;
   212  
   213    // List of groups with end-users.
   214    repeated string group = 2;
   215  
   216    // See TrustedDelegation comment.
   217    repeated TrustedDelegation trusted_delegation = 3;
   218  }
   219  
   220  
   221  // Defines a delegatee trusted to make authorization decisions for who can use
   222  // a pool.
   223  //
   224  // This is based on LUCI delegation protocol. Imagine an end user U calling
   225  // Swarming through an intermediary service X. In this case U is a delegator and
   226  // X is a delegatee. When X calls Swarming, it makes an RPC to the token server
   227  // to make a delegation token that says "<X can call Swarming on behalf of U>".
   228  //
   229  // This token is then sent to the Swarming with the RPC. Swarming sees that
   230  // the direct peer it's talking to is X, but the call should be performed under
   231  // the authority of U.
   232  //
   233  // We extend this to also allow X make authorization decisions about whether U
   234  // can use particular Swarming resource or not. The result of this decision is
   235  // encoded in the delegation token as a set of "key:value" tags. Swarming then
   236  // can treat presence of such tags as a signal that the particular call is
   237  // allowed.
   238  //
   239  // In this scenario we totally trust X to make the correct decision.
   240  message TrustedDelegation {
   241    message TagList {
   242      repeated string tag = 1;
   243    }
   244  
   245    // Email of a trusted delegatee (the one who's minting the delegation token).
   246    string peer_id = 1;
   247  
   248    // A list of tags to expected in the delegation token to allow the usage of
   249    // a pool.
   250    //
   251    // Presence of any of the specified tags are enough. The format of these tags
   252    // generally depends on what service is doing the delegation.
   253    TagList require_any_of = 2;
   254  }
   255  
   256  
   257  // A TaskTemplate describes a set of properties (caches, CIPD packages and
   258  // envvars) which apply to tasks created within a swarming pool.
   259  //
   260  // TaskTemplates may either be defined inline inside of
   261  // a TaskTemplateDeployment, or in "shared namespace" of the
   262  // PoolsCfg.task_template field.
   263  //
   264  // TaskTemplates may also include other TaskTemplates by name from the "shared
   265  // namespace" in PoolsCfg. Swarming calculates the final value for a given
   266  // TaskTemplate by applying all of its `include` fields depth-first, and then by
   267  // applying the properties in the body of the TaskTemplate. Includes may never
   268  // be repeated, including transitively. This means that "diamond shaped
   269  // dependencies" are forbidden (i.e. A<-B<-D and A<-C<-D would be forbidden
   270  // because `A` is included in `D` twice (via both C and B)).
   271  message TaskTemplate {
   272    // This gives the template a name for the 'include' field below. This only
   273    // applies to templates defined within the PoolsCfg message (i.e. the
   274    // top-level message), not to templates inlined into a TaskTemplateDeployment.
   275    string name = 1;
   276  
   277    // Includes properties from the named other TaskTemplate. This can only
   278    // include templates defined in the top-level PoolsCfg message.
   279    repeated string include = 2;
   280  
   281    message CacheEntry {
   282      // The name of the cache (required).
   283      string name = 1;
   284      // The path relative to the task root to mount the cache (required).
   285      string path = 2;
   286    }
   287    // CacheEntries are keyed by `name`, and `path` is overridden wholesale.
   288    //
   289    // It is illegal to have any TaskTemplate with multiple cache entries mapping
   290    // to the same path. It is illegal to have any cache paths overlap with cipd
   291    // package paths.
   292    repeated CacheEntry cache = 3;
   293  
   294    message CipdPackage {
   295      // The relative to the task root to unpack the CIPD package. A blank value
   296      // is permitted and means 'the root directory of the task'.
   297      string path = 1;
   298      // The CIPD package name template to use (required).
   299      string pkg = 2;
   300      // The version of the CIPD package to use (required).
   301      string version = 3;
   302    }
   303    // CipdPackages are keyed by (path, name), and `version` is overridden
   304    // wholesale.
   305    //
   306    // It is illegal to have any cipd paths overlap with cache entry paths.
   307    repeated CipdPackage cipd_package = 4;
   308  
   309    message Env {
   310      // The envvar you want to set (required).
   311      string var = 1;
   312  
   313      // The envvar value you want to set. Any prefixes are prepended to this
   314      // value. If the value is unset, prefixes will be prepended to the bot's
   315      // current value of this envvar (see examples)
   316      string value = 2;
   317  
   318      // Paths relative to the task root to prepend to this envvar on the bot.
   319      // These will be resolved to absolute paths on the bot.
   320      repeated string prefix = 3;
   321  
   322      // If true, tasks setting this EnvVar can overwrite the value and/or the
   323      // prefix. Otherwise, tasks will not be permitted to to set any env var or
   324      // env_prefix for this var.
   325      //
   326      // This should be True for envvars you expect tasks to extend, like $PATH.
   327      // Note that this only affects envvar manipulation at the Swarming API
   328      // level; once the task is running it can (of course) manipulate the env
   329      // however it wants.
   330      bool soft = 4;
   331    }
   332    // Env vars are keyed by the `var` field,
   333    //
   334    // `value` fields overwrite included values.
   335    // `soft` fields overwrite included values.
   336    // `prefix` fields append to included values. For example, Doing:
   337    //
   338    //     {name: "1" env { var: "PATH" prefix: "a" }}
   339    //     {name: "2" env { var: "PATH" prefix: "b" }}
   340    //     {name: "3" include: "1" include: "2" }
   341    //
   342    //  Is equivalent to:
   343    //
   344    //     {name: "3" env { var: "PATH" prefix: "a" prefix: "b" }}
   345    //
   346    //
   347    // Full Example:
   348    //
   349    //   env {
   350    //     var: "PATH"
   351    //     value: "/disable_system_path"
   352    //     prefix: "a"
   353    //     prefix: "b"
   354    //     prefix: "c"
   355    //     soft: true
   356    //   }
   357    //   env {
   358    //     var: "OTHER"
   359    //     value: "1"
   360    //   }
   361    //   env {
   362    //     var: "PYTHONPATH"
   363    //     prefix: "a"
   364    //   }
   365    //
   366    // Results in, essentially:
   367    //
   368    //   $PATH=/path/to/a:/path/to/b:/path/to/c:/disable_system_path
   369    //   $OTHER=1
   370    //   $PYTHONPATH=/path/to/a:$PYTHONPATH
   371    repeated Env env = 5;
   372  }
   373  
   374  
   375  // This is a tuple of (prod template, canary template, canary_chance), so that it
   376  // can be referenced from multiple pools simultaneously as a single unit.
   377  message TaskTemplateDeployment {
   378    // This gives the deployment a name for the 'task_template_deployment' field
   379    // in PoolCfg.
   380    //
   381    // When this TaskTemplateDeployment is inlined into another message (e.g.
   382    // `TaskTemplate.task_template_deployment_inline`), the name field must not be
   383    // specified.
   384    string name = 1;
   385  
   386    // Most Deployments will have a TaskTemplate with just a single include
   387    // directive.
   388    //
   389    // However, pools which substantially differ from other ones could define an
   390    // entire template inline without being forced to put it in the shared
   391    // namespace.
   392    //
   393    // The name field in this template (and the canary template) must not be
   394    // specified.
   395    TaskTemplate prod = 2;
   396  
   397    // The canary template can be defined like the `prod` field above. If this is
   398    // defined and `canary_chance` is greater than 0, then this template will be
   399    // selected instead of `prod`.
   400    TaskTemplate canary = 3;
   401  
   402    // range [0, 9999] where each tick corresponds to %0.01 chance of selecting
   403    // the template. Exactly 0 means 'canary is disabled', meaning that tasks
   404    // in this pool will always get the prod template.
   405    //
   406    // Examples:
   407    //   * 1     ".01% chance of picking canary"
   408    //   * 10    ".1% chance of picking canary"
   409    //   * 100   "1% chance of picking canary"
   410    //   * 1000  "10% chance of picking canary"
   411    //   * 5000  "50% chance of picking canary"
   412    //   * 7500  "75% chance of picking canary"
   413    //   * 9999  "99.99% chance of picking canary"
   414    int32 canary_chance = 4;
   415  }
   416  
   417  
   418  // Defines about how to monitor bots.
   419  message BotMonitoring {
   420    // Name is used by Pool to describe how to monitor bots in this pool.
   421    string name = 1;
   422    // Dimension keys to be used to bucket the bots.
   423    //
   424    // The algorithm for a key with multiple values is:
   425    //   def simplify(values):
   426    //     values = sorted(values)
   427    //     return '|'.join(
   428    //         v for i, v in enumerate(values)
   429    //         if not any(v.startswith(value) for v in values[i+1:]))
   430    //
   431    // For example, if 'os' is specified and a bot has the values
   432    // ['Linux', 'Ubuntu', 'Ubuntu-16.04'], the bucket value used for this bot
   433    // will be 'Linux|Ubuntu-16.04'.
   434    //
   435    // Then whole algorithm then works for each key:
   436    //   def bucket(keys, bot_dimensions):
   437    //     return ';'.join(
   438    //         '%s:%s' % (key, simplify(bot_dimensions.get(values, []))
   439    //         for key in keys)
   440    //
   441    // so the end result may look like: 'os:Linux|Ubuntu-16.04;pool:Testers'.
   442    //
   443    // More precisely, when this is used, the other bot dimensions are ignored.
   444    // 'pool' is always implicit.
   445    repeated string dimension_key = 2;
   446  }
   447  
   448  // Describes an external scheduler used by a particular swarming pool and
   449  // dimension set, via the external scheduler API.
   450  message ExternalSchedulerConfig {
   451    // Service address of external scheduler.
   452    string address = 1;
   453  
   454    // Scheduler id within the external scheduler service to use. This value
   455    // is opaque to swarming.
   456    string id = 2;
   457  
   458    // Dimensions is a list of dimension strings in "key:value" format (e.g.
   459    // ["os:foo", "featureX:bar"]) that determines eligibility for a bot or task
   460    // to use this external scheduler. In particular:
   461    // - a bot will be eligible if it contains all of these dimensions.
   462    // - a task will be eligible if all its slices contain all these dimensions.
   463    //
   464    // Note of care: if this list is empty, that means all requests in the pool
   465    // are eligible to be forwarded to it.
   466    //
   467    // Note: To be deprecated, in favor of: any_dimensions and all_dimensions.
   468    repeated string dimensions = 3;
   469  
   470    // If not enabled, this external scheduler config will be ignored. This
   471    // makes it safer to add new configs (the naive approach of adding a config
   472    // with empty dimentions list would cause all requests to be routed to
   473    // that config).
   474    bool enabled = 4;
   475  
   476    bool fallback_when_empty = 5 [deprecated=true];
   477  
   478    // A task or bot must have all of these dimensions in order to match this
   479    // dimension set.
   480    //
   481    // Note: Support not yet implemented.
   482    repeated string all_dimensions = 6;
   483  
   484    // If any_dimensions is defined, a task or bot must have any of these
   485    // dimensions in order to match this dimension set.
   486    //
   487    // Note: Support not yet implemented.
   488    repeated string any_dimensions = 7;
   489  
   490    // If true, allows the swarming native scheduler to reap tasks that would
   491    // otherwise be owned by this external scheduler, if the external scheduler
   492    // returns no results.
   493    //
   494    // This field should be enabled temporarily when first turning on a new
   495    // external scheduler config, to allow tasks that existing prior to that time
   496    // to still have a chance to run. After prior tasks have aged out of the
   497    // system, this flag should be disabled, to get stricter consistency between
   498    // swarming's state and external scheduler's state.
   499    bool allow_es_fallback = 8;
   500  }
   501  
   502  message ExternalServices {
   503    // (deprecated) isolate field is not used.
   504    reserved 1;
   505  
   506    message CIPD {
   507      reserved 2; // Used to be "client_version", no longer used.
   508      reserved "client_version";
   509  
   510      // (required) URL of the default CIPD server to use, if it is not specified
   511      // in the task.
   512      //
   513      // Must start with "https://" or "http://".
   514      //
   515      // e.g. "https://chrome-infra-packages.appspot.com"
   516      string server = 1;
   517  
   518      // (required) The version of the cipd client to use. This is likely the
   519      // 'infra/tools/cipd/${platform}' package, but because it's part of the
   520      // bootstrap needs special logic to handle its installation.
   521      CipdPackage client_package = 3;
   522    }
   523    CIPD cipd = 2;
   524  }