go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/changelist/storage.proto (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  syntax = "proto3";
    16  
    17  package cv.internal.changelist;
    18  
    19  option go_package = "go.chromium.org/luci/cv/internal/changelist";
    20  
    21  import "google/protobuf/timestamp.proto";
    22  import "go.chromium.org/luci/common/proto/gerrit/gerrit.proto";
    23  
    24  // Snapshot stores a snapshot of CL info as seen by CV at a certain time.
    25  //
    26  // When stored in CL entity, represents latest known Gerrit data.
    27  // When stored in RunCL entity, represents data pertaining to a fixed patchset.
    28  message Snapshot {
    29    // Next tag: 12.
    30  
    31    // The timestamp from external system.
    32    // Used to determine if re-querying external system is needed.
    33    google.protobuf.Timestamp external_update_time = 1;
    34  
    35    // LUCI project in the context of which this snapshot was saved.
    36    //
    37    // Since a CL isn't a resource of CV, CV can't infer whether specific LUCI
    38    // project has access to a CL w/o re-querying Gerrit and effectively
    39    // recomputing the snapshot.
    40    string luci_project = 2;
    41  
    42    // Resolved dependencies of a CL.
    43    repeated Dep deps = 3;
    44  
    45    // Patchset is incremental number of the latest patchset (aka revision).
    46    int32 patchset = 4;
    47    // MinEquivalentPatchset is the smallest and hence the earliest patchset
    48    // which is code-wise equivalent to the latest one.
    49    //
    50    // See gerrit.EquivalentPatchsetRange function for details.
    51    //
    52    // CV tracks this to determine which prior tryjobs can be re-used and which
    53    // can be canceled.
    54    int32 min_equivalent_patchset = 5;
    55  
    56    // If set, indicates problems while ingesting CL into CV, which ought to be
    57    // communicated back to user.
    58    repeated CLError errors = 6;
    59  
    60    // Outdated establishes conditions for refreshing Snapshot after CV mutations.
    61    message Outdated {
    62      // TODO(tandrii): support gating refresh on having refresher start no
    63      // earlier than some point in time, e.g. EVersion of this CL entity.
    64      // TODO(tandrii): support gating refresh on having minimum
    65      // external_update_time or equivalent.
    66    }
    67    // Outdated if set means Snapshot must be considered likely outdated due to
    68    // recent CV mutations.
    69    //
    70    // In particular, Project Manager does not act on the CLs with .Outdated set.
    71    Outdated outdated = 7;
    72  
    73    // Metadata is an ordered list of key-value pairs, which may later be
    74    // interpreted by CV guts.
    75    //
    76    // For example,
    77    //   [("No-Tree-Checks", "True"), ("NOTRY", "TRUE")].
    78    //
    79    // In case of Gerrit CLs, these are extracted from CL descriptions,
    80    // The Git-Footer-Style keys are normalized.
    81    // The values are stripped from beginning and trailing whitespace.
    82    repeated StringPair metadata = 8;
    83  
    84    // CL-kind specific data.
    85    oneof kind {
    86      Gerrit gerrit = 11;
    87    }
    88  }
    89  
    90  enum DepKind {
    91    DEP_KIND_UNSPECIFIED = 0;
    92    // Dep MUST be patched in / submitted before the dependent CL.
    93    HARD = 1;
    94    // Dep SHOULD be patched in / submitted before the dependent CL,
    95    // but doesn't have to be.
    96    SOFT = 2;
    97  }
    98  
    99  message Dep {
   100    // CLID is internal CV ID of a CL which is the dependency.
   101    int64 clid = 1;
   102    DepKind kind = 2;
   103  }
   104  
   105  message Gerrit {
   106    // Gerrit host.
   107    string host = 5;
   108  
   109    // Info contains subset of ChangeInfo listed below.
   110    //
   111    // NOTE: keep this list in sync with RemoveUnusedGerritInfo() function.
   112    //  * number
   113    //  * owner
   114    //      * id
   115    //      * email (may be not set)
   116    //  * project
   117    //  * ref
   118    //  * status
   119    //  * current_revision
   120    //  * revisions
   121    //      * kind
   122    //      * number
   123    //      * ref
   124    //      * created
   125    //      * commit
   126    //         * id
   127    //         * parents
   128    //  * labels
   129    //      * optional
   130    //      * all (only if vote != 0)
   131    //          * user
   132    //              * id
   133    //              * email (may be not set)
   134    //      * value
   135    //  * messages
   136    //      * id
   137    //      * date
   138    //      * message
   139    //      * author
   140    //        * id
   141    //      * realauthor
   142    //        * id
   143    //  * updated
   144    //  * created
   145    gerrit.ChangeInfo info = 1;
   146  
   147    // Files are filenames touched in the current revision.
   148    //
   149    // It's derived from gerrit.ListFilesResponse, see
   150    // https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#list-files.
   151    repeated string files = 2;
   152  
   153    // Git dependencies of the current revision.
   154    repeated GerritGitDep git_deps = 3;
   155  
   156    // Free-form dependencies. Currently, sourced from CQ-Depend footers.
   157    // In the future, this may be derived from Gerrit hashtags, topics, or other
   158    // mechanisms.
   159    repeated GerritSoftDep soft_deps = 4;
   160  }
   161  
   162  // GerritGitDep is a dependency discovered via Git child->parent chain for one Gerrit CL.
   163  message GerritGitDep {
   164    // Host is omitted because it's always the same as that of the CL.
   165  
   166    // Gerrit Change number.
   167    int64 change = 1;
   168  
   169    // Immediate is set iff this dep is an immediate parent of the Gerrit CL.
   170    //
   171    // Immediate dep must be submitted before its child.
   172    // Non-immediate CLs don't necessarily have to be submitted before:
   173    //   for example, for a chain <base> <- A1 <- B1 <- C1 <- D1
   174    //   D1's deps are [A,B,C] but only C is immediate, and 1 stands for patchset.
   175    //   Developer may then swap B,C without re-uploading D (say, to avoid
   176    //   patchset churn), resulting in a new logical chain:
   177    //      <base> <- A1 <- C2 <- B2
   178    //                   \
   179    //                    <- B1 <- C1 <- D1
   180    //
   181    //   In this case, Gerrit's related changes for D1 will still return A1,B1,C1,
   182    //   which CV interprets as C must be landed before D, while B and A should
   183    //   be landed before D.
   184    bool immediate = 2;
   185  }
   186  
   187  message GerritSoftDep {
   188    // Gerrit host.
   189    string host = 1;
   190    // Gerrit change number.
   191    int64 change = 2;
   192  }
   193  
   194  // ApplicableConfig keeps track of configs applicable to a CL.
   195  //
   196  // This is computed based on known set of LUCI project configs, versions of
   197  // which are updated by CV independently, so the ApplicableConfig are also
   198  // eventually consistent.
   199  //
   200  // Normally, there is 1 applicable configs = exactly 1 project with 1 config
   201  // group. If CL is no longer watched by CV, there will be 0 applicable configs.
   202  //
   203  // Sometimes, there can be 2+ applicable configs. This happens if either:
   204  //  * eventual consistency: responsibility for CL is moved from one LUCI project
   205  //    to another. Three is no way to make this atomically, so CL may temporarily
   206  //    end up with 0 or 2 projects watching it, before settling on just 1.
   207  //  * misconfiguration: two projects or 2 different ConfigGroups within the same
   208  //    project watch the same CL.
   209  // In either case, CV refuses to guess and will abstain from processing such
   210  // CLs, but storing the list is very useful for CV debugging and potentially for
   211  // better diagnostic messages to CV users and LUCI project owners.
   212  message ApplicableConfig {
   213    message Project {
   214      string name = 1;
   215      // ID of the specific ConfigGroup. See cv/internal/config.ConfigGroupID.
   216      //
   217      // The referenced version may no longer be available to datastore,
   218      // commonly happening if CL wasn't active for a long time.
   219      repeated string config_group_ids = 2;
   220    }
   221    repeated Project projects = 2;
   222  }
   223  
   224  // Access records which LUCI project can or can't see a CL.
   225  //
   226  // If a LUCI project has Access, it means both:
   227  //  (1) the project can read details of the CL (via Git/Gerrit ACLs);
   228  //  (2) the project is the only LUCI project watching this CL in CV
   229  //      (via the CV config).
   230  //      Note: there can still be several applicable ConfigGroups of the same
   231  //      project (see ApplicableConfig).
   232  //
   233  // In practice, .Access is set in 4 cases:
   234  //
   235  // (a) `CQ-Depend: host:number` Gerrit CL footers allow users to specify
   236  //     arbitrary dependencies, which typically happens due to typos,
   237  //     but malicious actors can try to get CL details of restricted projects.
   238  //     Either way, CV must not be a confused deputy here and must keep track
   239  //     which project can see what.
   240  //
   241  // (b) due to recent re-configuration of one or more LUCI projects, either
   242  //     in CV config and/or in Gerrit ACLs, the previously watched & readable CL
   243  //     becomes unwatched and/or unreadable.
   244  //
   245  // (c) a previously existing CL was deleted (e.g. by its owner or Gerrit
   246  //     administrators).
   247  //
   248  // (d) eventual consistency of Gerrit masquerading as HTTP 404 on stale replica,
   249  //     while quorum of replicas think CL actually exists and specific LUCI
   250  //     project having access to it.
   251  //
   252  // Unfortunately, (d) isn't easy to distinguish from (b) and (c), so CV resorts
   253  // to tracking time since CL became invisible -- the longer, the more likely it
   254  // is (b) or (c).
   255  //
   256  // Furthermore, in case of (a), iff CV knows nothing about specific Gerrit CL
   257  // identified as `CQ-Depend: host:change`, CV in general can't determine which
   258  // LUCI project is allowed to watch this CL *before* fetching Gerrit project
   259  // (repo) and target ref.
   260  //
   261  //
   262  // NOTE on CV as confused deputy.
   263  //
   264  // CV works with multiple LUCI projects. As of this writing (June 2021),
   265  // unfortunately, CV doesn't verify that Gerrit repos watched by a LUCI project
   266  // are in fact owned by that LUCI project. Thus, nothing prevents one LUCI
   267  // project from starting to watch repos de-facto owned by another LUCI project.
   268  // This in turn brings 2 problems:
   269  //
   270  // (1) Denial of service: unsolved.
   271  //     Mitigation: CV will refuse to work with CLs which are watched by more
   272  //     than 1 project. Since CV will communicate by posting message to affected
   273  //     CL, this should be noticed and fixed quickly.
   274  //
   275  // (2) Information leaks: solved.
   276  //     Each LUCI project MUST use project-scoped service account (PSSA)
   277  //     (migration is under way, see https://crbug.com/824492).
   278  //     CV uses this account for all interaction with Gerrit on behalf a specific
   279  //     LUCI project. Corresponding Gerrit repos:
   280  //       * SHOULD limit read access to its own PSSA + developers,
   281  //       * MUST limit Submit rights to its own PSSA and possibly developers.
   282  //
   283  // For example,
   284  //   * `infra` project has all its Gerrit CLs public and doesn't care about
   285  //      information leaks. All other LUCI projects can read its CLs, as well
   286  //      as the whole Internet.
   287  //   * `infra-internal` project protects its Gerrit CLs, making them visible
   288  //     to `infra-internal-scoped@...` account only.
   289  //     When CV queries Gerrit on `infra-internal` behalf, CV uses
   290  //     `infra-internal-scoped` account and can fetch the data.
   291  //   * Suppose malicious actor compromised `infra` repo, and placed a new CV
   292  //     config there to start watching CLs of the `infra-internal` project
   293  //     as well as super/secret/repo, which wasn't watched by any CV before.
   294  //       * Unfortunately, CV can't currently object to the new config.
   295  //       * However, when querying Gerrit on `infra` behalf, CV uses
   296  //         `infra-scoped@...` account, which presumably won't be configured with
   297  //         read access to neither infra-internal nor super/secret/repo.
   298  //       * So, corresponding CLs will have .Access entry recording that
   299  //         `infra` has no access to them.
   300  //       * NOTE: CLs of infra-internal will also have .ApplicableConfig with two
   301  //         projects there, which will prevent normal operation of
   302  //         `infra-internal` CV but will not cause any leaks.
   303  message Access {
   304    message Project {
   305      // Deprecated. Use no_access_time instead.
   306      bool no_access = 1;
   307      // The time when this was last re-confirmed.
   308      google.protobuf.Timestamp update_time = 2;
   309      // The time after which CV should consider lack of access stable.
   310      //
   311      // TODO(crbug/1216630): may be unset until backfil is done,
   312      // in which case use `no_access` field.
   313      google.protobuf.Timestamp no_access_time = 3;
   314    }
   315    map<string, Project> by_project = 1;
   316    // TODO(tandrii): per-project ApplicableConfig here.
   317  }
   318  
   319  // CLError encapsulates all kinds of CL errors, which ultimately result in
   320  // purging of the CL while communicating the reason to the relevant users.
   321  //
   322  // The primary goal of the CLError is to transport via CV guts sufficient
   323  // information to generate a clear user-friendly error message.
   324  message CLError {
   325    // Next tag is 12.
   326    oneof kind {
   327      bool owner_lacks_email = 1;
   328      WatchedByManyConfigGroups watched_by_many_config_groups = 2;
   329      WatchedByManyProjects watched_by_many_projects = 8;
   330      InvalidDeps invalid_deps = 3;
   331      string unsupported_mode = 4;
   332      bool self_cq_depend = 5;
   333      string corrupt_gerrit_metadata = 6;
   334      ReusedTrigger reused_trigger = 7;
   335      // Set to true when the footer "Commit: false" is present.
   336      bool commit_blocked = 9;
   337      TriggerDeps trigger_deps = 10;
   338      // Set with the CL ID of a failed Run, if the purge-requested-CL depends on
   339      // the CL.
   340      int64 dep_run_failed = 11;
   341    }
   342  
   343    message WatchedByManyConfigGroups {
   344      // Config group names without LUCI project prefix.
   345      repeated string config_groups = 1;
   346    }
   347    message WatchedByManyProjects {
   348      repeated string projects = 1;
   349    }
   350  
   351    message InvalidDeps {
   352      // Deps not watched by the same LUCI project as the dependent.
   353      repeated Dep unwatched = 1;
   354      // Deps watched by the same LUCI project but different config group.
   355      repeated Dep wrong_config_group = 2;
   356  
   357      // Not yet submitted deps of a full run in non-combinable mode.
   358      repeated Dep single_full_deps = 3;
   359      // Not yet CQ-ed deps of a Run in combinable mode.
   360      repeated Dep combinable_untriggered = 4;
   361      // CQ-ed deps of a different mode.
   362      repeated Dep combinable_mismatched_mode = 5;
   363  
   364      message TooMany {
   365        int32 actual = 1;
   366        int32 max_allowed = 2;
   367      }
   368      // There are more non-submitted deps than is supported by CV.
   369      TooMany too_many = 6;
   370    }
   371  
   372    // ReusedTrigger means a CL trigger (e.g. CQ+1 vote) has already resulted in
   373    // a CQ Run which was finalized.
   374    //
   375    // Two known cases when this happens with a Gerrit CL:
   376    //  1. A user uploads a CL on ref A, then votes CQ+1.
   377    //     Before the Dry Run completes, the CL is moved to ref B, while
   378    //     preserving the CQ+1 vote.
   379    //     The old Run is finalized, but the new Run has the exact same trigger,
   380    //     which in CQDaemon-compatible mode means the new Run's ID is exactly the
   381    //     same as the old one, so CV can't create a new Run.
   382    //     TODO(crbug/1223349): after CQDaemon is deleted, the Run ID generation scheme
   383    //     can take into account the ref of a CL, and this use case can be allowed.
   384    //  2. The same as above but instead of moving CL between refs, abandon and
   385    //     restore the CL.
   386    message ReusedTrigger {
   387      // ID of the finalized Run.
   388      string run = 1;
   389    }
   390  
   391    // TriggerDeps indicates failures for triggering deps.
   392    message TriggerDeps {
   393      message PermissionDenied {
   394        int64 clid = 1;
   395        // If set, the vote was attempted on behalf of the Gerrit user.
   396        string email = 2;
   397      }
   398      // IDs of CLs for which trigger attempts failed due to permission denied.
   399      repeated PermissionDenied permission_denied = 1;
   400      // IDs of CLs for which trigger attempts failed because they were not found
   401      // in Gerrit.
   402      repeated int64 not_found = 2;
   403      // IDs of CLs for which trigger attempts failed due to internal Gerrit
   404      // error.
   405      repeated int64 internal_gerrit_error = 3;
   406    }
   407  }
   408  
   409  // CLUpdatedEvent is just a CL ID pinned to its latest known EVersion.
   410  message CLUpdatedEvent {
   411    int64 clid = 1;
   412    int64 eversion = 2;
   413  }
   414  
   415  // CLUpdatedEvents is a batch of CLUpdatedEvents.
   416  message CLUpdatedEvents {
   417    repeated CLUpdatedEvent events = 1;
   418  }
   419  
   420  // A string key-value pair.
   421  message StringPair {
   422    string key = 1;
   423    string value = 2;
   424  }