go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/proto/api/swarming.proto

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/proto/api/swarming.proto (about)

     1  // Copyright 2018 The LUCI Authors. All rights reserved.
     2  // Use of this source code is governed under the Apache License, Version 2.0
     3  // that can be found in the LICENSE file.
     4  
     5  // This proto tries to converge with
     6  // https://github.com/googleapis/googleapis/blob/master/google/devtools/remoteworkers/v1test2/
     7  // as much as it is sensible to (not much). It has several inherent divergences
     8  // as Swarming has a much wider use case and has a different fundamental model
     9  // for bot state. Swarming has the limitation of not supporting children
    10  // devices: as a single bot is a single execution unit, unlike RBE.
    11  
    12  syntax = "proto3";
    13  
    14  package swarming.v1;
    15  option go_package = "go.chromium.org/luci/swarming/proto/api;apipb";
    16  
    17  import "google/protobuf/duration.proto";
    18  import "google/protobuf/struct.proto";
    19  import "google/protobuf/timestamp.proto";
    20  
    21  
    22  // APIs.
    23  
    24  service BotAPI {
    25    // Events returns events relating to one bot.
    26    rpc Events(BotEventsRequest) returns (BotEventsResponse) {};
    27  
    28    // TODO(maruel): Finish implementation. https://crbug.com/913953
    29  }
    30  
    31  // Request for BotAPI.Events.
    32  message BotEventsRequest {
    33    // Required. Bot ID to retrieve results from.
    34    string bot_id = 1;
    35    // Optional. Use this field to specify the maximum number of results to be
    36    // returned by the server.
    37    //
    38    // The server may further constrain the maximum number of results returned in
    39    // a single page. If the page_size is 0, the server will decide the number of
    40    // results to be returned.
    41    int32 page_size = 2;
    42    // Optional. Use this field to request a specific page of the list results,
    43    // following a previous call.
    44    //
    45    // When specified, page_size, start_time and end_time must match exactly the
    46    // previous call's argument.
    47    string page_token = 3;
    48    // Optional. Earliest time to return bot event. Inclusive.
    49    //
    50    // If not specified, pagination is done until all events are returned.
    51    google.protobuf.Timestamp start_time = 4;
    52    // Optional. Most recent time to return bot event. Exclusive.
    53    //
    54    // If not specified, defaults to the current time.
    55    google.protobuf.Timestamp end_time = 5;
    56  }
    57  
    58  // Response of BotAPI.Events.
    59  message BotEventsResponse {
    60    // Events are in reverse chronological order, most recents first and going
    61    // down to older events.
    62    repeated BotEvent events = 1;
    63    // This field represents the pagination token to retrieve the next page of
    64    // results. If the value is "", it means no further results for the request.
    65    string next_page_token = 2;
    66  }
    67  
    68  
    69  // Common messages.
    70  
    71  // Represents a mapping of string to a string.
    72  //
    73  // The same as a map<key, value>, except that the encoding is deterministic.
    74  //
    75  // If the StringPair is itself repeated inside another message, the list
    76  // must be sorted by key and the keys must be unique.
    77  message StringPair {
    78    string key = 1;
    79    string value = 2;
    80  }
    81  
    82  // Represents a mapping of string to a list of strings.
    83  //
    84  // The same as a map<key, repeated values>, except that the encoding is
    85  // deterministic.
    86  //
    87  // If the StringListPair is itself repeated inside another message, the list
    88  // must be sorted by key and the keys must be unique.
    89  message StringListPair {
    90    string key = 1;
    91    // All the values for this key. values must be sorted. Human readable.
    92    //
    93    // This string should make sense to a user in the context of 'key'.
    94    repeated string values = 2;
    95  }
    96  
    97  
    98  // Bot description.
    99  
   100  // Bot describes a Swarming bot.
   101  //
   102  // Because a Swarming bot is a single execution unit unlike RBE, it doesn't have
   103  // a concept of owned device at the moment. This may change later.
   104  message Bot {
   105    // Bot ID. It must be unique across the Swarming fleet. Generally based on the
   106    // hostname where the bot runs, but that's not a requirement. Must be
   107    // predefined in bots.cfg.
   108    //
   109    // This value is also included in dimensions for the key 'id'.
   110    string bot_id = 1;
   111    // Bot session ID. An opaque value.
   112    //
   113    // There is one bot session ID per bot process ID on the host. When the bot
   114    // self-upgrades, it creates a new bot session ID.
   115    string session_id = 2; // Not used yet. https://crbug.com/786735
   116    // Pools that this bot belongs to. Normally assigned via bots.cfg. The
   117    // pools must be defined in pools.cfg.
   118    //
   119    // Normally a bot shall belong to a single pool, but belonging to multiple
   120    // pool is allowed. This is generally helpful for transitioning bots.
   121    //
   122    // This value is also included in dimensions for the key 'pool'.
   123    repeated string pools = 3;
   124  
   125    // Current bot status. A bot status is a state in which the bot is for a
   126    // certain amount of time.
   127    BotStatusType status = 4;
   128    // Supplemental information to describe the bot status. Human readable.
   129    //
   130    // See BotStatusType for the meaning of this string for each status.
   131    string status_msg = 5;
   132    // Current task being handled by the bot, if there is one.
   133    //
   134    // In Swarming, only a single task can be assigned to a bot at any given time.
   135    string current_task_id = 6;
   136  
   137    // Bot reported dimensions. dimensions is a {key: [values]} dictionary. This
   138    // can be used to declare the properties of the host or for the DUT (Device
   139    // Under Test) under control. This is used for task selection.
   140    //
   141    // In RBE, this is called Property. The difference is that RBE's Property is a
   142    // string:string flat dictionary, it doesn't allow repeated values.
   143    //
   144    // https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Detailed-Design.md#bot-dimensions
   145    //
   146    // dimensions MUST be sorted by keys, and each values list must be sorted.
   147    // Each dimension key must be unique.
   148    //
   149    // The values are effectively an OR, a task may match any of the value.
   150    //
   151    // Human readable.
   152    repeated StringListPair dimensions = 7;
   153  
   154    // Bot reported informational state. This can be used to describe the host,
   155    // the bot itself and the DUT (Device Under Test) under control as applicable.
   156    //
   157    // This is NOT used for task selection.
   158    BotInfo info = 8;
   159  }
   160  
   161  // BotStatusType is one of the states the bot can be in.
   162  //
   163  // A bot status implies being in this status for a certain amount of time, for
   164  // example a hook running for N seconds, contrary to BotEventType which is about
   165  // an event that doesn't have an inherent duration.
   166  //
   167  // Some values are more important than others. For example if a bot is now
   168  // MISSING but used to be QUARANTINED, the value is still MISSING.
   169  enum BotStatusType {
   170    // Invalid bot status, do not use.
   171    BOT_STATUS_UNSPECIFIED = 0;
   172  
   173    // Bad states
   174  
   175    // The server detected that the bot is not pinging the server anymore. Unlike
   176    // other statuses, this value is set after a timeout.
   177    //
   178    // Bot.status_msg shall not be set.
   179    MISSING = 1;
   180    // Bot was quarantined by the server.
   181    //
   182    // Bot.status_msg shall include the server provided rationale.
   183    QUARANTINED_BY_SERVER = 2; // Not used yet. https://crbug.com/757931
   184    // Bot self-reported as unhealthy.
   185    //
   186    // What is currently called as 'quarantined' in the old API.
   187    //
   188    // Bot.status_msg shall include the bot provided rationale.
   189    QUARANTINED_BY_BOT = 3;
   190  
   191    // Overhead states, healthy but unproductive
   192  
   193    // Bot self-reported as unable to run tasks due to externally induced
   194    // overhead.
   195    //
   196    // Examples include:
   197    // - The temperature of the DUT (Device Under Test) is too high, and the bot
   198    //   is waiting for cool down
   199    // - host is doing self-cleaning work out of the bot's control (puppet is
   200    //   running), etc.
   201    //
   202    // Bot.status_msg shall include the bot provided rationale.
   203    OVERHEAD_MAINTENANCE_EXTERNAL = 4;
   204    // Bot self-reported as unable to run tasks due to doing internal overhead.
   205    //
   206    // Examples include:
   207    // - Running hooks
   208    // - Cleaning up or verifying its local cache
   209    // - Bot is starting for a version upgrade
   210    //
   211    // Bot.status_msg shall disambiguate the type of work item done.
   212    OVERHEAD_BOT_INTERNAL = 5;  // Not used yet. https://crbug.com/870723
   213    // Bot is down as its host is rebooting and contact was lost.
   214    //
   215    // If the bot doesn't contact back soon enough, it will be considered MISSING.
   216    //
   217    // Bot.status_msg shall not be set.
   218    HOST_REBOOTING = 6;  // Not used yet. https://crbug.com/870723
   219  
   220    // Healthy states
   221  
   222    // Running a task.
   223    //
   224    // Bot.status_msg shall not be set.
   225    BUSY = 7;
   226    // Bot is 'reserved' for operations outside of normal operations. This can be
   227    // relevant for SUT (System Under Test).
   228    //
   229    // Bot.status_msg shall not be set.
   230    RESERVED = 8;  // Not used yet. https://crbug.com/913978
   231    // Bot is healthy and waiting for tasks.
   232    //
   233    // Bot.status_msg shall not be set.
   234    IDLE = 9;
   235  }
   236  
   237  // Bot reported informational state. This can be used to describe the host,
   238  // the bot itself and the DUT (Device Under Test) under control as applicable.
   239  //
   240  // This is NOT used for task selection.
   241  message BotInfo {
   242    // supplemental contains the free form JSON data that includes interesting
   243    // information about the bot that doesn't fit in any of the fields below.
   244    //
   245    // Anything that is usable by multiple customers should eventually be moved to
   246    // a new field below.
   247    google.protobuf.Struct supplemental = 1;
   248    // Bot's version. An opaque value.
   249    //
   250    // This value is Swarming instance and configuration dependent. Bot are
   251    // updated through the process described at
   252    // https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#update
   253    string version = 2;
   254    // External IP address as visible by the server.
   255    //
   256    // This could be a NAT'ing router external IP.
   257    //
   258    // Can be either IPv4 or IPv6.
   259    string external_ip = 3;
   260    // Authentication identity that the bot identified as. An opaque value.
   261    string authenticated_as = 4;
   262  
   263    // State of the content addressed cache on the bot. This is used for inputs
   264    // files.
   265    CASStats cas_stats = 5;
   266    // State of the named caches (used by incremental tasks) on the bot. This is
   267    // used for task that benefits from incrementality, like builds.
   268    //
   269    // Should be sorted by name.
   270    repeated NamedCacheStats named_caches_stats = 6;
   271    // State of the CIPD packages cache on the bot. This is use for installable,
   272    // versioned packages.
   273    //
   274    // Should be sorted by package name, then version.
   275    repeated CIPDPackageCacheStats cipd_packages_cache_stats = 7;
   276  
   277    // Information about the host.
   278    PhysicalEntity host = 8;
   279    // Information about the devices connected to the host.
   280    //
   281    // This can be the DUT (Device Under Test) or other peripherals.
   282    repeated PhysicalEntity devices = 9;
   283  
   284    // This field is used in BOT_MISSING event to know the timestamp of the last activity.
   285    google.protobuf.Timestamp last_seen_ts = 10;
   286  
   287    // The time when the bot became idle.
   288    google.protobuf.Timestamp idle_since_ts = 11;
   289  }
   290  
   291  // PhysicalEntity includes information about an host or device.
   292  //
   293  // This can be the host where the bot runs, or a device under control of the
   294  // bot.
   295  //
   296  // If the bot runs inside a docker container, this information is about the
   297  // container, or whatever the bot can observe from its vantage point.
   298  message PhysicalEntity {
   299    // Name that represents this physical entity.
   300    //
   301    // For a host, it shall be the hostname. For a device, it should be the device
   302    // hostname, if any. Failing that, something that makes sense to the users.
   303    string name = 1;
   304    // supplemental contains the free form JSON data that includes interesting
   305    // information about the device that doesn't fit in any of the fields below.
   306    //
   307    // Anything that is usable by multiple customers should eventually be moved to
   308    // a new field below.
   309    google.protobuf.Struct supplemental = 2;
   310  
   311    // IP address as visible by the bot process (bot_main) itself.
   312    //
   313    // In the case of the host, it will be one of the IP addresses assigned to it.
   314    // In the case of the host where the bot is running inside docker, it will be
   315    // the IP address assigned to the docker container.
   316    // In the case of a device, it is the IP address of the device, if any.
   317    //
   318    // Can be either IPv4 or IPv6.
   319    string ip = 3;
   320  
   321    // TODO(maruel): https://crbug.com/916570
   322    // - Temperature, already included in state for most host and devices
   323    // - disks, already included in state for host, can be added for devices
   324    // - OS version. The OS version is repeated here since the dimension 'os'
   325    //   could be about the DUT (device under test) or the host.
   326  }
   327  
   328  // Bot local content addressed cache information.
   329  message CASStats {
   330    int64 number_items = 1;
   331    int64 size = 2;
   332    google.protobuf.Timestamp oldest_time = 3;
   333  }
   334  
   335  // Bot local named cache information.
   336  message NamedCacheStats {
   337    string name = 1;
   338    int64 size = 2;
   339    google.protobuf.Timestamp last_use_time = 3;
   340  }
   341  
   342  // Bot local CIPD package cache information.
   343  message CIPDPackageCacheStats {
   344    string name = 1;
   345    string version = 2;
   346    int64 size = 3;
   347    google.protobuf.Timestamp last_use_time = 4;
   348  }
   349  
   350  // BotEventType defines the reason why BotEvent was created.
   351  enum BotEventType {
   352    // Invalid bot event type, do not use.
   353    BOT_EVENT_TYPE_UNSPECIFIED = 0;
   354  
   355    // Bot specific events that are outside the scope of a task.
   356  
   357    // Bot connected and started a new session.
   358    //
   359    // BotEvent.event_msg shall not be set.
   360    BOT_NEW_SESSION = 1;
   361  
   362    // Currently unused.
   363    BOT_INTERNAL_FAILURE = 2;
   364  
   365    // Bot had an hook error to report to the server. This shall send a report to
   366    // the administrator of the instance.
   367    //
   368    // BotEvent.event_msg shall contain the error message.
   369    BOT_HOOK_ERROR = 3;
   370  
   371    // Bot hook logged information. The bot hooks can log locally to the local log
   372    // file, which itself can be streamed out of band. For special notifications
   373    // that are worth notifying the administrator, this event can be used to raise
   374    // these. Due to the cost of doing an RPC just for this, this should be used
   375    // sparingly; vs local logging.
   376    //
   377    // BotEvent.event_msg shall contain the log entry.
   378    BOT_HOOK_LOG = 4;
   379  
   380    // Bot initiated a host reboot. An example is a bot hook requesting to reboot
   381    // the host after a task failure.
   382    //
   383    // BotEvent.event_msg shall contain the reason for rebooting the host, if any.
   384    BOT_REBOOTING_HOST = 5;
   385    // Bot is shutting down. It may be restarting for an update.
   386    //
   387    // BotEvent.event_msg shall contain the reason.
   388    BOT_SHUTDOWN = 6;
   389  
   390    // Currently unused.
   391    BOT_DELETED = 7;
   392  
   393    // Bot is missing. There have been no communication from the bot for longer
   394    // than deadline configured on server side.
   395    BOT_MISSING = 8;
   396  
   397    // Bot polling results; these are commands sent to the bot to do actions.
   398  
   399    // The server instructs the bot to stay idle. This is when there is no task
   400    // pending for this bot. Will only be stored when there are other state
   401    // changes.
   402    //
   403    // BotEvent.event_msg shall not be set.
   404    INSTRUCT_IDLE = 10;
   405    // The server instructs the bot to start a task.
   406    //
   407    // BotEvent.event_msg shall not be set. BotEvent.bot.current_task_id shall
   408    // contain the task ID.
   409    INSTRUCT_START_TASK = 11;
   410    // The server instructs the bot to restart without self-updating. This is to
   411    // initiate a new bot session, with potentially new bot hooks.
   412    //
   413    // BotEvent.event_msg can be set to the rationale, if any.
   414    INSTRUCT_RESTART_BOT = 12;
   415    // The server instructs the bot to self-update.
   416    //
   417    // BotEvent.event_msg shall be set to the version to update to.
   418    // BotEvent.bot.info.version contains the bot's previous version.
   419    INSTRUCT_UPDATE_BOT_CODE = 13;
   420    // The server instructs the bot to stop its process.
   421    //
   422    // BotEvent.event_msg shall not be set. BotEvent.bot.current_task_id shall
   423    // contain the task ID.
   424    INSTRUCT_TERMINATE_BOT = 14;
   425  
   426    // Task lifecycle events as processed by the bot. In these event types,
   427    // Bot.bot.current_task_id shall be set.
   428  
   429    // Bot completed a task.
   430    //
   431    // BotEvent.event_msg shall not be set. BotEvent.bot.current_task_id shall
   432    // contain the task ID.
   433    TASK_COMPLETED = 20;
   434    // Bot had an internal failure (RAN_INTERNAL_FAILURE) to report to the server
   435    // while processing a task. This shall send a report to the administrator of
   436    // the instance and service author.
   437    //
   438    // This event shall not be filed in case of a MISSING_INPUTS.
   439    //
   440    // BotEvent.event_msg shall contain the error message.
   441    // BotEvent.bot.current_task_id shall contain the task ID.
   442    TASK_INTERNAL_FAILURE = 21;
   443    // Bot is forcibly killing the task.
   444    //
   445    // This can be induced by a server side request (KILLED, PREEMPTED) or by a
   446    // bot side decision (TIMED_OUT, TIMED_OUT_SILENCE).
   447    //
   448    // BotEvent.event_msg shall not be set. BotEvent.bot.current_task_id shall
   449    // contain the task ID.
   450    TASK_KILLED = 22;
   451  }
   452  
   453  // BotEvent represents an event on the bot.
   454  //
   455  // This message is used both in the API and as a BigQuery table description for
   456  // the table 'bot_events' in dataset 'swarming'.
   457  message BotEvent {
   458    google.protobuf.Timestamp event_time = 1;
   459  
   460    // Snapshot of the Bot that had this event.
   461    //
   462    // Eventually we'd want to only snapshot the difference from the previous
   463    // event, but this would make the SQL queries much more complicated.
   464    Bot bot = 2;
   465    // Type of state change (event) that trigger this message.
   466    BotEventType event = 3;
   467    // Supplementation information to describe the bot event. Human readable.
   468    //
   469    // See BotEventType for the meaning of this string for each status.
   470    string event_msg = 4;
   471  }
   472  
   473  
   474  // Task scheduling.
   475  
   476  // Defines a Content Addressed Storage (a cache in practice) data tree
   477  // reference, normally a reference to a .isolated file.
   478  //
   479  // Deprecated: Isoalte server is being migrated to RBE-CAS. Use `CASReference`
   480  // for the digest on RBE-CAS.
   481  //
   482  // This can be used to refer to either a task's inputs or a task's outputs.
   483  //
   484  // The .isolated file format is defined at
   485  // https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/isolate/doc/Design.md#file-format
   486  // It is a JSON file listing all the inputs.
   487  //
   488  // It is very different RBE's CAS format, which uses a merkel tree of protobuf
   489  // files.
   490  message CASTree {
   491    // server is one of:
   492    // - The isolated server to fetch (or push) content from. Must contain
   493    //   "https://" or "http://" prefix.
   494    // - The Google Cloud Project name hosting the RBE CAS.
   495    string server = 1;
   496    // The hex encoded hash of an isolated archive. It is expected to be a SHA-1
   497    // (40 characters) or SHA-256 (64 characters), based on the namespace value
   498    // below.
   499    string digest = 2;
   500    // Namespace on the isolate server. This currently defines the hashing
   501    // algorithm and compression algorithm but is currently loosely defined.
   502    //
   503    // A prefix "sha256-" defines a SHA-256 hashing. Defaults to SHA-1.
   504    // A suffix "-deflate" or "-gzip" defines a deflate algorithm.
   505    //
   506    // When referring to a RBE CAS instance, the namespace must be set to
   507    // "sha256-GCP". The GCP RBE CAS requires SHA-256 and doesn't support
   508    // precompressed data.
   509    string namespace = 3;
   510  }
   511  
   512  message Digest {
   513    // This is a [Digest][build.bazel.remote.execution.v2.Digest] of a blob on
   514    // RBE-CAS. See the explanations at the original definition.
   515    // https://github.com/bazelbuild/remote-apis/blob/77cfb44a88577a7ade5dd2400425f6d50469ec6d/build/bazel/remote/execution/v2/remote_execution.proto#L753-L791
   516    string hash = 1;
   517    int64 size_bytes = 2;
   518  }
   519  
   520  message CASReference {
   521    // Full name of RBE-CAS instance. `projects/{project_id}/instances/{instance}`.
   522    // e.g. projects/chromium-swarm/instances/default_instance
   523    string cas_instance = 1;
   524    // CAS Digest consists of hash and size bytes.
   525    Digest digest = 2;
   526  }
   527  
   528  // Defines one CIPD package to install prior to running the task.
   529  //
   530  // CIPD packages are versioned and ACL'ed packages that are meant for tools that
   531  // are kept for a long time.
   532  message CIPDPackage {
   533    // The template for the CIPD package name that will have its variables
   534    // evaluated, e.g. "infra/tools/authutil/${platform}".
   535    //
   536    // TODO(vadimsh): Link to documentation of the variable usable.
   537    string package_name = 1;
   538    // Valid package version for the requested package.
   539    string version = 2;
   540    // Path to directory relative to the task's root dir, where the package is to
   541    // be installed.
   542    //
   543    // If empty, the package will be installed at the root of the mapped
   544    // directory. If file names in the package and in the isolate clash, it will
   545    // cause a failure.
   546    string dest_path = 3;
   547  }
   548  
   549  // Describes a named cache that should be reused on the bot.
   550  //
   551  // A NamedCacheEntry in a task specifies that the task wants a directory to be
   552  // persisted on the bot across tasks.
   553  //
   554  // The cache directory is created at <run_dir>/|path|. If the cache was not
   555  // present on the bot prior the task's execution, the directory is empty when
   556  // the task starts. Any change done in the directory by the task is persisted on
   557  // the bot after the task completes.
   558  //
   559  // If another task runs on the same bot and requests the same named cache, even
   560  // if mapped to a different path, it will get the updated content.
   561  message NamedCacheEntry {
   562    // Unique name of the cache. Required. Length is limited to 4096.
   563    string name = 1;
   564    // Path to directory relative to the task's root dir, where the named cache is
   565    // to be installed.
   566    //
   567    // A path cannot be shared among multiple caches or CIPD installations.
   568    // A task will fail if a file/dir with the same name already exists.
   569    string dest_path = 2;
   570  }
   571  
   572  // Defines the type of containment to use to put the task primary process
   573  // inside.
   574  //
   575  // TODO(maruel): https://crbug.com/808836
   576  //
   577  // This is highly OS specific:
   578  // - Lower the integrity level on Windows. https://crbug.com/916586
   579  // - Job Object on Windows. https://crbug.com/732818
   580  // - Docker on Linux or Windows. https://crbug.com/916584
   581  // - cgroup on Linux. https://crbug.com/764493
   582  // - Creating a temporary user on Windows and macOS. https://crbug.com/916585
   583  // - Lightweight home directory override on Windows, Linux and macOS.
   584  //   https://crbug.com/811411
   585  message Containment {
   586    enum ContainmentType {
   587      // Historical value, not specified. Containment may or may not be used.
   588      NOT_SPECIFIED = 0;
   589      // No containment, the default for now.
   590      NONE = 1;
   591      // Use the containment appropriate on the platform.
   592      AUTO = 2;
   593      // Use Job Object on Windows. Will fail if used on other platforms.
   594      JOB_OBJECT = 3;
   595    }
   596  
   597    // Lowers the priority of the task process when started. Doesn't require
   598    // containment. This gives the bot a chance to survive when the task starts an
   599    // overwhelming number of children processes.
   600    bool lower_priority = 1;
   601  
   602    // Defines the type of containment used.
   603    ContainmentType containment_type = 2;
   604  
   605    // The values below require a form of containment to be enforced.
   606  
   607    // Limits the number of concurrent active processes.
   608    int64 limit_processes = 3;
   609    // Limits the total amount of memory allocated by processes.
   610    int64 limit_total_committed_memory = 4;
   611  }
   612  
   613  // The user to run the task.
   614  enum User {
   615    // Unspecified; will default to pool-level defaults.
   616    USER_UNSPECIFIED = 0;
   617    // Historical value; tasks currently run as the same user that Swarming ran
   618    // as.
   619    USER_SWARMING = 1;
   620    // Using the new, unprivileged user to run the task.
   621    USER_TEMPORARY = 2;
   622  }
   623  
   624  // Defines the 'what' to run.
   625  //
   626  // A serialization of this message is hashed and this hash is what is used for
   627  // task deduping.
   628  // NEXT_ID: 20
   629  message TaskProperties {
   630    // Inputs.
   631  
   632    // Isolated inputs to map in the working directory.
   633    //
   634    // Deprecated: Isolate server is being migrated to RBE-CAS. `cas_input_root`
   635    // will be used to reference to the input root on RBE-CAS.
   636    // Deprecated: the isolated file may optionally specify a command to run.
   637    // Otherwise, 'command' must be specified.
   638    CASTree cas_inputs = 1;
   639  
   640    // Digest of the input root on RBE-CAS.
   641    // cas_input_root.digest MUST be digest of [build.bazel.remote.execution.v2.Directory].
   642    CASReference cas_input_root = 18;
   643    // Defines the set of CIPD packages to install prior to running the task.
   644    //
   645    // These packages are meant to be software that is needed (a dependency) by
   646    // the task being run. Unlike isolated files from cas_inputs, the CIPD
   647    // packages do not expire from the server.
   648    //
   649    // Items must be sorted per the CIPD package name.
   650    repeated CIPDPackage cipd_inputs = 2;
   651    // Specifies named caches to map into the working directory. These caches
   652    // outlive the task, which can then be reused by tasks later used on this bot
   653    // that request the same named cache.
   654    //
   655    // Items must be sorted per the named cache name.
   656    repeated NamedCacheEntry named_caches = 3;
   657    // Command to run. This has priority over a command specified in the isolated
   658    // files.
   659    repeated string command = 4;
   660    // Relative working directory to start the 'command' in, defaults to the root
   661    // mapped directory or what is provided in the isolated file, if any.
   662    string relative_cwd = 5;
   663    // Extraneous arguments to append to the command specified in the isolated
   664    // file. Can only be used when an isolated file specifies a command.
   665    //
   666    // Deprecated.
   667    repeated string extra_args = 6;
   668    // Secret bytes to provide to the task. Write only, cannot be retrieved back.
   669    bytes secret_bytes = 7;
   670    // When retrieved back, has_secret_bytes is set to true.
   671    bool has_secret_bytes = 8;
   672  
   673    // Environment.
   674  
   675    // Dimensions are what is used to determine which bot can run the task.
   676    //
   677    // The values are effectively an AND, a bot must match all dimensions to be
   678    // selected to run the task.
   679    //
   680    // Items must be sorted.
   681    repeated StringListPair dimensions = 9;
   682    // Environment variables to set when running the task.
   683    //
   684    // Items must be sorted.
   685    repeated StringPair env = 10;
   686    // Task root relative paths to prepend to a given environment variable.
   687    //
   688    // This allows one to safely modify variables like PATH, PYTHONPATH, or other
   689    // PATH-like environment variables. The order of operations is:
   690    // * Turn slashes into native-platform slashes
   691    // * Make the path absolute
   692    // * Prepend it to the current value of the envvar using the os-native list
   693    //   separator (`;` on Windows, `:` on POSIX)
   694    //
   695    // Each key can have multiple paths to prepend. They will be prepended in
   696    // the order seen here.
   697    //
   698    // For example, if env_paths is:
   699    //   [ (key="PATH", values=["foo", "bar"]),
   700    //     (key="CUSTOMPATH", values=["custom"]), ]
   701    //
   702    // The task would see:
   703    //   PATH=/path/to/swarming/rundir/foo:/path/to/swarming/rundir/bar:$PATH
   704    //   CUSTOMPATH=/path/to/swarming/rundir/custom
   705    //
   706    // Paths must always be specified here with forward-slashes, and must not
   707    // attempt to escape the task's root (i.e. must not contain `..`).
   708    //
   709    // This is applied AFTER evaluating `env`.
   710    //
   711    // Items must be sorted by key, but exceptionally not by values.
   712    repeated StringListPair env_paths = 11;
   713    // Declare what kind of containment shall be used to run the task process
   714    // in.
   715    Containment containment = 12;  // Not used yet. https://crbug.com/808836
   716  
   717    // Timing.
   718  
   719    // Maximum number of seconds the task can run before its process is forcibly
   720    // terminated and the task results in TIMED_OUT.
   721    google.protobuf.Duration execution_timeout = 13;
   722    // Maximum number of seconds the task may be silent (no output to stdout nor
   723    // stderr) before it is considered hung and it forcibly terminated early and
   724    // the task results in TIMED_OUT_SILENCE.
   725    google.protobuf.Duration io_timeout = 14;
   726    // Number of second to give the child process after a SIGTERM before sending a
   727    // SIGKILL. See ../../doc/Bot.md#timeout-handling
   728    google.protobuf.Duration grace_period = 15;
   729  
   730    // True if the task does not access any service through the network and is
   731    // believed to be certain to produce the same output given the same input. In
   732    // the case of a successful task, previous results will be reused if possible,
   733    // leading to DEDUPED task result for the tasks that could reuse previous
   734    // task's outcome.
   735    bool idempotent = 16;
   736  
   737    // Paths in the working directory to archive back and store as
   738    // TaskResult.outputs.
   739    //
   740    // Items must be sorted.
   741    repeated string outputs = 17;
   742  
   743    // User to run the task as.
   744    User user = 19;
   745  }
   746  
   747  // Defines a possible task execution for a task request to be run on the
   748  // Swarming infrastructure.
   749  //
   750  // When there is more than TaskSlice specified in TaskRequest, the second
   751  // TaskSlice onwards represent possible fallbacks.
   752  message TaskSlice {
   753    // The property of the task to try to run.
   754    //
   755    // If there is no bot that can serve this properties.dimensions when this task
   756    // slice is enqueued, it is immediately denied. This can trigger if:
   757    // - There is no bot with these dimensions currently known (NO_RESOURCE).
   758    // - Bots that could run this task are either all missing or quarantined.
   759    TaskProperties properties = 1;
   760    // If this task slice is not scheduled after waiting this long, the next one
   761    // will be processed.
   762    google.protobuf.Duration expiration = 2;
   763    // When a task is scheduled and there are currently no bots available to run
   764    // the task, the TaskSlice can either be PENDING, or be denied immediately.
   765    // When denied, the next TaskSlice is enqueued, and if there's no following
   766    // TaskSlice, the task state is set to NO_RESOURCE. This should normally be
   767    // set to False to avoid unnecessary waiting.
   768    bool wait_for_capacity = 3;
   769  
   770    // Digest of a serialized form of TaskProperties.
   771    //
   772    // This is used for DEDUPED and PENDING_DEDUPING when idempotent is true.
   773    // Consider this value as opaque string, only use to check equality.
   774    //
   775    // It is set even if idempotent is false.
   776    string properties_hash = 4;
   777  }
   778  
   779  // This message is used to create a new task and can be retrieved back, except
   780  // for a few write-only fields.
   781  //
   782  // A TaskRequest is immutable, it cannot be updated once created.
   783  message TaskRequest {
   784    // Scheduling: what to run, when to run, under which service account.
   785  
   786    // List of TaskSlice, along with their scheduling parameters.
   787    //
   788    // This defines all the various possible task execution for a task request to
   789    // be run on the Swarming infrastructure. They are processed in order, and it
   790    // is guaranteed that at most one of these will be processed.
   791    //
   792    // At least one must be specified, and a maximum number of 8 can be included.
   793    repeated TaskSlice task_slices = 1;
   794    // Task priority, the lower the more important.
   795    //
   796    // Valid values are between 1 and 255.
   797    int32 priority = 2;
   798    // Defines what OAuth2 credentials the task uses when calling other services.
   799    //
   800    // Possible values are:
   801    //   - 'none': do not use a task service account at all, this is the default.
   802    //   - 'bot': use bot's own account, works only if bots authenticate with
   803    //       OAuth2.
   804    //  - <some email>: use this specific service account if it is allowed in the
   805    //       pool (via 'allowed_service_account' pools.cfg setting) and configured
   806    //       in the token server's service_accounts.cfg.
   807    //
   808    // Note that the service account name is specified outside of task properties,
   809    // and thus it is possible to have two tasks with different service accounts,
   810    // but identical properties hash (so one can be deduped). If this is
   811    // unsuitable use 'idempotent=False' or include a service account name in
   812    // properties separately.
   813    string service_account = 3;
   814  
   815    // Task information metadata: doesn't affect what is run.
   816  
   817    // When the task was created.
   818    google.protobuf.Timestamp create_time = 4;
   819    // Task name for display purpose.
   820    //
   821    // Note: this value is not indexed. If you want to be able to query for tasks
   822    // based on names, use tags below.
   823    string name = 5;
   824    // Tags are 'key:value' strings that describes what the task is about (it's
   825    // semantic meaning).
   826    //
   827    // It is fine to reuse the same 'key' multiple times. It is not fine to use a
   828    // key that is also used as a dimension.
   829    //
   830    // The tags are indexed, thus can be used for search with exact matches.
   831    //
   832    // Items must be sorted.
   833    repeated string tags = 6;
   834    // User for this task is run, if relevant. Not validated.
   835    string user = 7;
   836    // Authenticated client that triggered this task.
   837    string authenticated = 13;
   838    // Task realm is used to control who can interact with the task e.g. get,
   839    // cancel etc, and which task service accounts can be used in the realm.
   840    string realm = 14;
   841    // Swarming:ResultDB integration configuration for a task.
   842    ResultDBCfg resultdb = 15;
   843  
   844    // Task hierarchy and notifications
   845  
   846    // The task request ID.
   847    //
   848    // The request wasn't "run" so it is the same ID as the summary (ending with
   849    // '0').
   850    string task_id = 8;
   851    // Parent Swarming task summary ID of the process requesting this task.
   852    //
   853    // This points to the TaskResult.task_id (ending with '0'). Note that an
   854    // idempotent task can be automatically retried by Swarming, which may result
   855    // in two TaskResult with the same task_id but different run_id.
   856    //
   857    // This field is read-only and derived from parent_run_id. It cannot be
   858    // specified at task creation.
   859    string parent_task_id = 9;
   860    // Parent Swarming task run ID of the process requesting this task.
   861    //
   862    // This field is set on the children tasks when a Swarming task creates
   863    // children Swarming tasks.
   864    //
   865    // This points to the TaskResult.run_id (ending with '1', '2' or more).
   866    string parent_run_id = 11;
   867    // Root task id, independent of the depth of recursive tasks.
   868    string root_task_id = 16;
   869    // Root task run id, independent of the depth of recursive tasks.
   870    string root_run_id = 17;
   871  
   872    // Send notification to this pubsub topic for updates of this task.
   873    PubSub pubsub_notification = 10;
   874    // Maximum delay between bot pings before the bot is considered dead
   875    // while running a task.
   876    //
   877    // When a task is running, the bot sends update to the server every
   878    // few seconds. In some cases, like when the system is overloaded,
   879    // the bot may be preempted and delayed in sending its updates.
   880    // After the delay specified here, the server will claim the bot to
   881    // be dead and will forcibly abort the task as BOT_DIED. This is to
   882    // catch system wide issues like a BSOD.
   883    google.protobuf.Duration bot_ping_tolerance = 12;
   884  }
   885  
   886  // PubSub is a Cloud Pub/Sub topic to send task updates to.
   887  //
   888  // For this to work, the Swarming's AppEngine service account must have
   889  // roles/pubsub.publisher role on the Cloud Pub/Sub topic.
   890  //
   891  // For a Swarming instance "FOOBAR.appspot.com", the service account to grant
   892  // publisher right is "FOOBAR@@appspot.gserviceaccount.com".
   893  //
   894  // This is described at https://cloud.google.com/pubsub/docs/access-control.
   895  //
   896  // To grant Swarming instance FOOBAR.appspot.com publisher rights to topic
   897  // projects/PROJ/topics/TOP, use:
   898  //
   899  //   gcloud beta pubsub topics add-iam-policy-binding \
   900  //       TOP \
   901  //       --project PROJ \
   902  //       --member serviceAccount:FOOBAR@appspot.gserviceaccount.com \
   903  //       --role roles/pubsub.publisher
   904  //
   905  // See https://cloud.google.com/pubsub/docs/authentication for more
   906  // information.
   907  message PubSub {
   908    // Full topic name to post task state updates to, e.g.
   909    // "projects/<id>/topics/<id>".
   910    string topic = 1;
   911    // Secret string to put into "auth_token" attribute of PubSub messages.
   912    //
   913    // This value is write only, it cannot be retrieved back.
   914    string auth_token = 2;
   915    // String to put into "userdata" attribute of PubSub messages.
   916    string userdata = 3;
   917  }
   918  
   919  // TaskResult is the result of a TaskRequest as it is processed by Swarming.
   920  //
   921  // The TaskResult represents one attempt (run on a bot) and/or the final result
   922  // (summary). When the task never ran (for example EXPIRED), there's one summary
   923  // but no run.
   924  //
   925  // An idempotent task can be automatically retried by Swarming, which may result
   926  // in two TaskResult with the same task_id but different run_id; two runs, one
   927  // summary.
   928  //
   929  // A retry is done when a task fails with a retriable error (for example with
   930  // RAN_INTERNAL_FAILURE). For the client's perspective when looking at the
   931  // summary (ID ending with '0'), the task went from PENDING to RUNNING and then
   932  // back to PENDING.
   933  //
   934  // When stored in BigQuery in table task_result_run and task_results_summary,
   935  // on-going tasks are in the __NULL__ partition since end_time is unset.
   936  //
   937  // There's a risk of duplicate rows because BigQuery is eventually consistent
   938  // with regards to duplicate rows. Set your filter to ignore the __NULL__
   939  // partition to enforce strong consistency and ignore on-going tasks. See
   940  // https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataconsistency
   941  // for more information.
   942  // NEXT_ID: 23
   943  message TaskResult {
   944    TaskRequest request = 1;
   945  
   946    // Timing information.
   947  
   948    // Time the task was requested.
   949    google.protobuf.Timestamp create_time = 2;
   950    // Time the task started being run by a bot, before RUNNING_OVERHEAD_SETUP.
   951    //
   952    // Doing "start_time - create_time" gives the task pending time.
   953    google.protobuf.Timestamp start_time = 3;
   954    // Time when the task was abandoned instead of normal completion.
   955    //
   956    // This happens for example when a task was KILLED, this then represents the
   957    // time a client requested the task to be killed, which is before end_time.
   958    // Same for TIMED_OUT state, this then represents the time when the bot
   959    // decided to abort the task.
   960    google.protobuf.Timestamp abandon_time = 4;
   961    // Time the task completed and teared down, after RUNNING_OVERHEAD_TEARDOWN.
   962    //
   963    // Doing "end_time - start_time" will not lead to the exact task duration,
   964    // since this time frame includes overheads.
   965    google.protobuf.Timestamp end_time = 5;
   966    // Duration of the task. This excludes overheads.
   967    google.protobuf.Duration duration = 6;
   968  
   969    // Execution information.
   970  
   971    // Current state of the task (e.g. PENDING, RUNNING, COMPLETED, EXPIRED, etc).
   972    TaskState state = 7;
   973    // The category of the current task state. This is primarily useful to
   974    // simplify BigQuery queries. This can be used to determine if a task is done
   975    // or if still considered for execution.
   976    TaskStateCategory state_category = 8;
   977    // The task try number.
   978    //
   979    // It is 0 for a deduped task, since nothing ran. It is 0 if the task is still
   980    // PENDING.
   981    //
   982    // It is normally 1 for a task that started running and runs a normal flow.
   983    //
   984    // A number above 1 means that the the task was tried multiple times. It can
   985    // be due to a previous try resulting in a task state in the category
   986    // CATEGORY_TRANSIENT_DONE.
   987    int32 try_number = 9;
   988    // Index in the TaskRequest.task_slices (TaskSlice instance) that this result
   989    // represents. This is updated when a TaskSlice is enqueued to run. It can be
   990    // updated until the task state is in either category CATEGORY_EXECUTION_DONE
   991    // or CATEGORY_NEVER_RAN_DONE.
   992    //
   993    // The TaskSlice contains a TaskProperties, which defines what is run.
   994    int32 current_task_slice = 10;
   995    // Snapshot of the bot that was assigned to this task at the start of the
   996    // task. This includes bot local cache information.
   997    Bot bot = 11;
   998    // Server versions that touched this task.
   999    //
  1000    // A different version of the server may get the request and hand it to the
  1001    // bot. This is primarily useful to detect if a new server version introduced
  1002    // a bug and for canarying purpose.
  1003    repeated string server_versions = 12;
  1004  
  1005    // Task identity and hierarchy information.
  1006  
  1007    // List of children task IDs that this task triggered, if any.
  1008    //
  1009    // This happens only in the case of reentrant tasks, a Swarming task that
  1010    // itself triggers more Swarming task. Each of these task will have 'run_id'
  1011    // set as their 'TaskRequest.parent_task_id'.
  1012    repeated string children_task_ids = 13;
  1013    // Task ID which results was reused for state DEDUPED.
  1014    //
  1015    // This is the run_id (ending with '1', '2' or more).
  1016    string deduped_from = 14;
  1017    // Summary task ID (ending with '0') when creating a new task.
  1018    string task_id = 15;
  1019    // Actual executed task id that this task represents.
  1020    //
  1021    // This value is only set if it ran, that is, the task went through one of the
  1022    // state in CATEGORY_RUNNING.
  1023    //
  1024    // A task_id can have multiple run_id associated to it, they will have the
  1025    // corresponding try_number incremented starting at 1.
  1026    string run_id = 16;
  1027  
  1028    // Task metadata for inputs (reproducibility) and performance.
  1029  
  1030    // Listing of the actual pinned CIPDPackages that the task used.
  1031    //
  1032    // These can vary from the input packages if the inputs included non-identity
  1033    // versions (e.g. a ref like "latest"). This can be available once task setup
  1034    // is completed.
  1035    CIPDPins cipd_pins = 17;
  1036    // Statistics about overhead for an isolated task. This is populated as the
  1037    // task goes through setup, execution and teardown.
  1038    TaskPerformance performance = 18;
  1039  
  1040    // Task's process result.
  1041  
  1042    // Process exit code if relevant. May be forcibly set to -1 in exceptional
  1043    // cases.
  1044    sint64 exit_code = 19;
  1045    // Isolated outputs, if any.
  1046    // Deprecated: `cas_output_root` will be used.
  1047    CASTree outputs = 20;
  1048    // RBE-CAS output, if any.
  1049    CASReference cas_output_root = 21;
  1050  
  1051    // ResultDB related properties for this task.
  1052    ResultDBInfo resultdb_info = 22;
  1053  }
  1054  
  1055  // Defines pinned CIPD packages that were installed during the task.
  1056  message CIPDPins {
  1057    // The CIPD server where the CIPD packages were fetched from. Must contain
  1058    // "https://" or "http://" prefix.
  1059    //
  1060    // This field or its subfields are optional if default CIPD client is defined
  1061    // in the server config.
  1062    string server = 1;
  1063    // The pinned package + version of the CIPD client that was actually used.
  1064    CIPDPackage client_package = 2;
  1065    // List of CIPD packages that were installed in the task with fully resolved
  1066    // package names and versions.
  1067    repeated CIPDPackage packages = 3;
  1068  }
  1069  
  1070  // Information about the task's performance.
  1071  message TaskPerformance {
  1072    // Total cost of running this task in $USD. In the case of DEDUPED task, this
  1073    // represents the amount saved.
  1074    float cost_usd = 1;
  1075    // Total overhead caused by the bot.
  1076    // This is calculated by subtracting the command duration measured in
  1077    // run_isolated.py from the total duration in task_runner.run_command().
  1078    // https://source.chromium.org/chromium/infra/infra/+/master:luci/appengine/swarming/swarming_bot/bot_code/task_runner.py;l=811;drc=dfa14c6863d14a5969bb4fea08846985d89aed76
  1079    google.protobuf.Duration total_overhead = 5;
  1080    // Overhead that is caused by the bot server that is not accounted for by the
  1081    // other overheads.
  1082    google.protobuf.Duration other_overhead = 2;
  1083    // Deprecated: use setup_overhead instead.
  1084    // Task environment setup overhead. This is the task state
  1085    // RUNNING_OVERHEAD_SETUP.
  1086    TaskOverheadStats setup = 3;
  1087    // Deprecated: use teardown_overhead instead.
  1088    // Task environment teardown overhead. This is the task state
  1089    // RUNNING_OVERHEAD_TEARDOWN.
  1090    TaskOverheadStats teardown = 4;
  1091    // Task environment setup overhead. This is the task state
  1092    // RUNNING_OVERHEAD_SETUP.
  1093    TaskSetupOverhead setup_overhead = 6;
  1094    // Task environment teardown overhead. This is the task state
  1095    // RUNNING_OVERHEAD_TEARDOWN.
  1096    TaskTeardownOverhead teardown_overhead = 7;
  1097  }
  1098  
  1099  // Overhead information about setup.
  1100  message TaskSetupOverhead {
  1101    // Duration of this overhead.
  1102    google.protobuf.Duration duration = 1;
  1103    // Cache trimming overhead.
  1104    CacheTrimOverhead cache_trim = 2;
  1105    // CIPD packge installation overhead.
  1106    CIPDOverhead cipd = 3;
  1107    // Named cache install overhead.
  1108    NamedCacheOverhead named_cache = 4;
  1109    // CAS download overhead.
  1110    CASOverhead cas = 5;
  1111  }
  1112  
  1113  // Overhead information about teardown.
  1114  message TaskTeardownOverhead {
  1115    // Duration of this overhead.
  1116    google.protobuf.Duration duration = 1;
  1117    // CAS upload overhead.
  1118    CASOverhead cas = 2;
  1119    // Named cache uninstall overhead.
  1120    NamedCacheOverhead named_cache = 3;
  1121    // Directory cleanup overhead.
  1122    CleanupOverhead cleanup = 4;
  1123  }
  1124  
  1125  // Overhead information about cache trimming.
  1126  message CacheTrimOverhead {
  1127    google.protobuf.Duration duration = 1;
  1128  }
  1129  
  1130  // Overhead information about CIPD packge installation.
  1131  message CIPDOverhead {
  1132    // Duration of this overhead.
  1133    google.protobuf.Duration duration = 1;
  1134  }
  1135  
  1136  // Overhead information about Named Caches install or uninstall.
  1137  message NamedCacheOverhead {
  1138    // Duration of this overhead.
  1139    google.protobuf.Duration duration = 1;
  1140  }
  1141  
  1142  // Overhead information about CAS download or upload.
  1143  message CASOverhead {
  1144    // Duration of this overhead.
  1145    google.protobuf.Duration duration = 1;
  1146  
  1147    // CAS entries that were not present in the local or remote cache and had to
  1148    // be sent across the network.
  1149    CASEntriesStats cold = 2;
  1150    // CAS entries that were in the cache and thus didn't have to be transferred.
  1151    CASEntriesStats hot = 3;
  1152  }
  1153  
  1154  // Overhead information about cleanup step.
  1155  message CleanupOverhead {
  1156    // Duration of this overhead.
  1157    google.protobuf.Duration duration = 1;
  1158  }
  1159  
  1160  // Deprecated: Use TaskSetupOverheadStats or TaskTeardownOverheadStats instead.
  1161  // Information about setup or teardown.
  1162  message TaskOverheadStats {
  1163    // Duration of this overhead.
  1164    google.protobuf.Duration duration = 1;
  1165  
  1166    // CAS entries that were not present in the local or remote cache and had to
  1167    // be sent across the network.
  1168    CASEntriesStats cold = 2;
  1169    // CAS entries that were in the cache and thus didn't have to be transferred.
  1170    CASEntriesStats hot = 3;
  1171  
  1172    // CIPD information:
  1173    // TODO(maruel): Add.
  1174  
  1175    // Named cache information:
  1176    // TODO(maruel): Add.
  1177  }
  1178  
  1179  // Statistics for differential CAS entries in the context of I/O for a task.
  1180  message CASEntriesStats {
  1181    int64 num_items = 1;
  1182    int64 total_bytes_items = 2;
  1183    // This buffer is compressed as deflate'd delta-encoded varints. This is the
  1184    // list of all the item size for an I/O operation, which can scale in the 100k
  1185    // range. So this can be large! See //client/utils/large.py for the code to
  1186    // handle these.
  1187    bytes items = 6;
  1188  }
  1189  
  1190  // TaskStateCategory represents the 5 different categories of task state.
  1191  //
  1192  // For active state categories (RUNNING_MASK and TRANSIENT_DONE_MASK), it is
  1193  // possible to go 'back' to PENDING_MASK category; for example, a task has an
  1194  // internal error, and the server reenqueues the task for a second try.
  1195  enum TaskStateCategory {
  1196    // Invalid value.
  1197    TASK_STATE_CATEGORY_UNSPECIFIED = 0;
  1198  
  1199    // Bit mask for the TaskState inside each category.
  1200    TASK_STATE_MASK = 0x0F;
  1201  
  1202    // The task is enqueued and pending bot availability.
  1203    CATEGORY_PENDING = 0x10;
  1204    // The task is running.
  1205    CATEGORY_RUNNING = 0x20;
  1206    // Transient done states are uncertain states; something ran but the result
  1207    // was inconclusive.
  1208    //
  1209    // They can trigger the Swarming internal retry mechanism. In this case, the
  1210    // "task try" will have this state, but the task summary will become PENDING.
  1211    // In case the task cannot be retried, when idempotent is false, then this
  1212    // becomes a final state.
  1213    CATEGORY_TRANSIENT_DONE = 0x30;
  1214    // The task ran, and it is done.
  1215    CATEGORY_EXECUTION_DONE = 0x40;
  1216    // The task did not run, and won't.
  1217    CATEGORY_NEVER_RAN_DONE = 0x50;
  1218  }
  1219  
  1220  // TaskState represents the different possible states for a Task.
  1221  //
  1222  // Each state is in one of the bitmask in TaskStateCategory.
  1223  enum TaskState {
  1224    // Invalid task state.
  1225    TASK_STATE_INVALID = 0;
  1226  
  1227    // Task states in PENDING_MASK:
  1228  
  1229    // The task is currently pending.
  1230    //
  1231    // This means that no bot reaped the task yet. It will stay in this state
  1232    // until either a bot reaps the task, or the expiration elapsed or all bots
  1233    // become MISSING, leading to a NO_RESOURCE. The task pending expiration is
  1234    // specified as TaskSlice.expiration, one per task slice.
  1235    //
  1236    // The task may go through multiple pending TaskSlice as they expire or are
  1237    // skipped due to NO_RESOURCE (see definition below). In this situation the
  1238    // task state still stays in PENDING state as long as there's a chance for a
  1239    // bot to reap the task.
  1240    PENDING = 0x10;
  1241    // The task is currently pending, but another previously scheduled task was
  1242    // identified to be deduped against, but the previously scheduled task hasn't
  1243    // completed yet.
  1244    //
  1245    // In this case, the task may go back into PENDING if the previous identical
  1246    // task failed, or immediately into DEDUPED if it succeeded.
  1247    PENDING_DEDUPING = 0x11;  // Not used yet, https://crbug.com/915342
  1248  
  1249    // Task states in RUNNING_MASK:
  1250  
  1251    // The task is currently running.
  1252    //
  1253    // For new tasks, this is only the actual tasks runtime. For old tasks, this
  1254    // includes RUNNING_OVERHEAD_START and RUNNING_OVERHEAD_END.
  1255    RUNNING = 0x20;
  1256    // The task is assigned to a bot. The bot is fetching input files and setting
  1257    // up the runtime environment.
  1258    RUNNING_OVERHEAD_SETUP = 0x21;  // Not used yet, https://crbug.com/796757
  1259    // Task completed and result metadata is available. Outputs and other
  1260    // associated logs are still being uploaded and the environment is being
  1261    // teared down.
  1262    //
  1263    // A client that only needs the exit code may chose to stop waiting for the
  1264    // task, as the task will end with COMPLETED, unless there's a failure during
  1265    // outputs upload, which would result in INTERNAL_FAILURE.
  1266    RUNNING_OVERHEAD_TEARDOWN = 0x22;  // Not used yet, https://crbug.com/813412
  1267    // The task is being forcibly terminated. This can be due to either a kill
  1268    // request, preemption or time out.
  1269    //
  1270    // See
  1271    // https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#graceful-termination_aka-the-sigterm-and-sigkill-dance
  1272    TERMINATING = 0x23;  // Not used yet. https://crbug.com/916560
  1273    // Task completed, result metadata and task outputs are available. There's
  1274    // still some overhead being finished like attaching relevant bot logs to the
  1275    // task.
  1276    //
  1277    // The client can return right away unless infrastructure issue debugging is
  1278    // needed.
  1279    COMPLETING = 0x2F; // Not used yet, https://crbug.com/813412
  1280  
  1281    // Task states in TRANSIENT_DONE_MASK:
  1282  
  1283    // The task ran but the bot had an internal failure, unrelated to the task
  1284    // itself. It can be due to disk or network I/O issues.
  1285    RAN_INTERNAL_FAILURE = 0x30;
  1286    // The task ran and completed normally, but returned an exit code that was
  1287    // provided in the TaskProperties as signaling an hardware failure of the DUT
  1288    // (Device Under Test).
  1289    //
  1290    // As such, the task may need to be retried.
  1291    DUT_FAILURE = 0x31;  // Not used yet, https://crbug.com/902807
  1292    // The task started but the bot failed to keep the connection to the server
  1293    // alive. This can be due to the bot's host crashing, or network connectivity
  1294    // issues.
  1295    BOT_DISAPPEARED = 0x32;  // Not used yet. https://crbug.com/916553
  1296    // The task ran but was killed by the client or an external scheduler in a way
  1297    // that it should still be retried as another task try.
  1298    //
  1299    // This can happen via the external scheduler or an API yet to be defined. The
  1300    // rationale is to kill slow running low priority task, without disrupting the
  1301    // client and simply postponing the task for later.
  1302    PREEMPTED = 0x33;  // Not used yet. https://crbug.com/916559
  1303  
  1304    //
  1305    // All the states below are inactive final states.
  1306    //
  1307  
  1308    // Task states in EXECUTION_DONE_MASK:
  1309  
  1310    // The task ran and completed normally. The task process exit code may be 0 or
  1311    // another value.
  1312    //
  1313    // This value is also used when the task is deduped against a previous task.
  1314    COMPLETED = 0x40;
  1315    // The task ran for longer than the allowed time in
  1316    // TaskProperties.execution_timeout.
  1317    //
  1318    // This means the bot forcefully killed the task process as described in the
  1319    // graceful termination dance in the documentation.
  1320    TIMED_OUT = 0x41;
  1321    // The task timed out due to not sending updates to stdout or stderr within
  1322    // the period specified in TaskProperties.io_timeout.
  1323    //
  1324    // This means the bot forcefully killed the task process as described in the
  1325    // graceful termination dance in the documentation.
  1326    TIMED_OUT_SILENCE = 0x42;  // Not used yet. https://crbug.com/916556
  1327    // The task ran but was manually killed via the 'cancel' API.
  1328    //
  1329    // This means the bot forcefully killed the task process as described in the
  1330    // graceful termination dance in the documentation.
  1331    KILLED = 0x43;
  1332    // The task had specified invalid inputs. This is found out by the bot while
  1333    // RUNNING_OVERHEAD_SETUP.
  1334    //
  1335    // For example, the cas_inputs or cipd_inputs refers to missing items,
  1336    // or the requested containment cannot be achieved.
  1337    MISSING_INPUTS = 0x44;  // Not used yet. https://crbug.com/916553
  1338  
  1339    // Task states in NEVER_RAN_DONE_MASK:
  1340  
  1341    // The task didn't have to run, because a previous task had results. It is
  1342    // functionally equivalent to COMPLETED, except that previous results were
  1343    // returned as-is.
  1344    DEDUPED = 0x50;
  1345    // The task is not pending anymore; it never ran due to lack of capacity.
  1346    //
  1347    // This means that other higher priority tasks ran instead and that not enough
  1348    // bots were available to run this task for TaskSlice.expiration.
  1349    EXPIRED = 0x51;
  1350    // The task never ran, and was manually cancelled via the 'cancel' API before
  1351    // it was reaped.
  1352    CANCELED = 0x52;
  1353    // The task was never set to PENDING and was immediately refused, as the
  1354    // server determined that there is no bot capacity to run this task. This
  1355    // happens because no bot exposes a superset of the requested task dimensions.
  1356    //
  1357    // There can be a situation where a task goes from PENDING to NO_RESOURCE if
  1358    // capacity (bots) is removed.
  1359    //
  1360    // Set TaskSlice.wait_for_capacity to True to force the server to keep the
  1361    // task slice pending even in this case. Generally speaking, the task will
  1362    // eventually switch to EXPIRED, as there's no bot to run it. That said, there
  1363    // are situations where it is known that in some not-too-distant future a wild
  1364    // bot will appear that will be able to run this task.
  1365    NO_RESOURCE = 0x53;
  1366    // The task was valid but was denied due to a temporary capacity surcharge.
  1367    // The user should try again after a delay, or surface the lack of capacity to
  1368    // the user.
  1369    LOAD_SHED = 0x54;  // Not used yet. https://crbug.com/916562
  1370    // The task is valid but was denied due to insufficient quota.
  1371    RESOURCE_EXHAUSTED = 0x55;  // Not used yet. https://crbug.com/916557
  1372    // The task never ran, the server had an internal failure, unrelated to the
  1373    // task itself. It can be due to a server bug or network I/O issues.
  1374    SKIPPED_INTERNAL_FAILURE = 0x56;  // Not used yet. https://crbug.com/916553
  1375    // The task encounted an error caused by the client. This means that
  1376    // rerunning the task with the same parameters will not change the result
  1377    CLIENT_ERROR = 0x57;
  1378  }
  1379  
  1380  // Swarming:ResultDB integration configuration for a task.
  1381  // This is a copy of SwarmingRpcsResultDBCfg.
  1382  message ResultDBCfg {
  1383    // Flag to indicates the integration is enabled.
  1384    bool enable = 1;
  1385  }
  1386  
  1387  // ResultDB Related properties.
  1388  message ResultDBInfo {
  1389    // ResultDB hostname, e.g. "results.api.cr.dev"
  1390    string hostname =1;
  1391  
  1392    // Name of the task's ResultDB invocation.
  1393    //
  1394    // e.g. "invocations/task-chromium-swarm.appspot.com-deadbeef1"
  1395    // None if Swarming:ResultDB integration was not enabled for this task.
  1396    //
  1397    // If the task was deduplicated, this equals invocation name of the original
  1398    // task.
  1399    string  invocation = 2;
  1400  }