go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/changelist/storage.proto (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 syntax = "proto3"; 16 17 package cv.internal.changelist; 18 19 option go_package = "go.chromium.org/luci/cv/internal/changelist"; 20 21 import "google/protobuf/timestamp.proto"; 22 import "go.chromium.org/luci/common/proto/gerrit/gerrit.proto"; 23 24 // Snapshot stores a snapshot of CL info as seen by CV at a certain time. 25 // 26 // When stored in CL entity, represents latest known Gerrit data. 27 // When stored in RunCL entity, represents data pertaining to a fixed patchset. 28 message Snapshot { 29 // Next tag: 12. 30 31 // The timestamp from external system. 32 // Used to determine if re-querying external system is needed. 33 google.protobuf.Timestamp external_update_time = 1; 34 35 // LUCI project in the context of which this snapshot was saved. 36 // 37 // Since a CL isn't a resource of CV, CV can't infer whether specific LUCI 38 // project has access to a CL w/o re-querying Gerrit and effectively 39 // recomputing the snapshot. 40 string luci_project = 2; 41 42 // Resolved dependencies of a CL. 43 repeated Dep deps = 3; 44 45 // Patchset is incremental number of the latest patchset (aka revision). 46 int32 patchset = 4; 47 // MinEquivalentPatchset is the smallest and hence the earliest patchset 48 // which is code-wise equivalent to the latest one. 49 // 50 // See gerrit.EquivalentPatchsetRange function for details. 51 // 52 // CV tracks this to determine which prior tryjobs can be re-used and which 53 // can be canceled. 54 int32 min_equivalent_patchset = 5; 55 56 // If set, indicates problems while ingesting CL into CV, which ought to be 57 // communicated back to user. 58 repeated CLError errors = 6; 59 60 // Outdated establishes conditions for refreshing Snapshot after CV mutations. 61 message Outdated { 62 // TODO(tandrii): support gating refresh on having refresher start no 63 // earlier than some point in time, e.g. EVersion of this CL entity. 64 // TODO(tandrii): support gating refresh on having minimum 65 // external_update_time or equivalent. 66 } 67 // Outdated if set means Snapshot must be considered likely outdated due to 68 // recent CV mutations. 69 // 70 // In particular, Project Manager does not act on the CLs with .Outdated set. 71 Outdated outdated = 7; 72 73 // Metadata is an ordered list of key-value pairs, which may later be 74 // interpreted by CV guts. 75 // 76 // For example, 77 // [("No-Tree-Checks", "True"), ("NOTRY", "TRUE")]. 78 // 79 // In case of Gerrit CLs, these are extracted from CL descriptions, 80 // The Git-Footer-Style keys are normalized. 81 // The values are stripped from beginning and trailing whitespace. 82 repeated StringPair metadata = 8; 83 84 // CL-kind specific data. 85 oneof kind { 86 Gerrit gerrit = 11; 87 } 88 } 89 90 enum DepKind { 91 DEP_KIND_UNSPECIFIED = 0; 92 // Dep MUST be patched in / submitted before the dependent CL. 93 HARD = 1; 94 // Dep SHOULD be patched in / submitted before the dependent CL, 95 // but doesn't have to be. 96 SOFT = 2; 97 } 98 99 message Dep { 100 // CLID is internal CV ID of a CL which is the dependency. 101 int64 clid = 1; 102 DepKind kind = 2; 103 } 104 105 message Gerrit { 106 // Gerrit host. 107 string host = 5; 108 109 // Info contains subset of ChangeInfo listed below. 110 // 111 // NOTE: keep this list in sync with RemoveUnusedGerritInfo() function. 112 // * number 113 // * owner 114 // * id 115 // * email (may be not set) 116 // * project 117 // * ref 118 // * status 119 // * current_revision 120 // * revisions 121 // * kind 122 // * number 123 // * ref 124 // * created 125 // * commit 126 // * id 127 // * parents 128 // * labels 129 // * optional 130 // * all (only if vote != 0) 131 // * user 132 // * id 133 // * email (may be not set) 134 // * value 135 // * messages 136 // * id 137 // * date 138 // * message 139 // * author 140 // * id 141 // * realauthor 142 // * id 143 // * updated 144 // * created 145 gerrit.ChangeInfo info = 1; 146 147 // Files are filenames touched in the current revision. 148 // 149 // It's derived from gerrit.ListFilesResponse, see 150 // https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#list-files. 151 repeated string files = 2; 152 153 // Git dependencies of the current revision. 154 repeated GerritGitDep git_deps = 3; 155 156 // Free-form dependencies. Currently, sourced from CQ-Depend footers. 157 // In the future, this may be derived from Gerrit hashtags, topics, or other 158 // mechanisms. 159 repeated GerritSoftDep soft_deps = 4; 160 } 161 162 // GerritGitDep is a dependency discovered via Git child->parent chain for one Gerrit CL. 163 message GerritGitDep { 164 // Host is omitted because it's always the same as that of the CL. 165 166 // Gerrit Change number. 167 int64 change = 1; 168 169 // Immediate is set iff this dep is an immediate parent of the Gerrit CL. 170 // 171 // Immediate dep must be submitted before its child. 172 // Non-immediate CLs don't necessarily have to be submitted before: 173 // for example, for a chain <base> <- A1 <- B1 <- C1 <- D1 174 // D1's deps are [A,B,C] but only C is immediate, and 1 stands for patchset. 175 // Developer may then swap B,C without re-uploading D (say, to avoid 176 // patchset churn), resulting in a new logical chain: 177 // <base> <- A1 <- C2 <- B2 178 // \ 179 // <- B1 <- C1 <- D1 180 // 181 // In this case, Gerrit's related changes for D1 will still return A1,B1,C1, 182 // which CV interprets as C must be landed before D, while B and A should 183 // be landed before D. 184 bool immediate = 2; 185 } 186 187 message GerritSoftDep { 188 // Gerrit host. 189 string host = 1; 190 // Gerrit change number. 191 int64 change = 2; 192 } 193 194 // ApplicableConfig keeps track of configs applicable to a CL. 195 // 196 // This is computed based on known set of LUCI project configs, versions of 197 // which are updated by CV independently, so the ApplicableConfig are also 198 // eventually consistent. 199 // 200 // Normally, there is 1 applicable configs = exactly 1 project with 1 config 201 // group. If CL is no longer watched by CV, there will be 0 applicable configs. 202 // 203 // Sometimes, there can be 2+ applicable configs. This happens if either: 204 // * eventual consistency: responsibility for CL is moved from one LUCI project 205 // to another. Three is no way to make this atomically, so CL may temporarily 206 // end up with 0 or 2 projects watching it, before settling on just 1. 207 // * misconfiguration: two projects or 2 different ConfigGroups within the same 208 // project watch the same CL. 209 // In either case, CV refuses to guess and will abstain from processing such 210 // CLs, but storing the list is very useful for CV debugging and potentially for 211 // better diagnostic messages to CV users and LUCI project owners. 212 message ApplicableConfig { 213 message Project { 214 string name = 1; 215 // ID of the specific ConfigGroup. See cv/internal/config.ConfigGroupID. 216 // 217 // The referenced version may no longer be available to datastore, 218 // commonly happening if CL wasn't active for a long time. 219 repeated string config_group_ids = 2; 220 } 221 repeated Project projects = 2; 222 } 223 224 // Access records which LUCI project can or can't see a CL. 225 // 226 // If a LUCI project has Access, it means both: 227 // (1) the project can read details of the CL (via Git/Gerrit ACLs); 228 // (2) the project is the only LUCI project watching this CL in CV 229 // (via the CV config). 230 // Note: there can still be several applicable ConfigGroups of the same 231 // project (see ApplicableConfig). 232 // 233 // In practice, .Access is set in 4 cases: 234 // 235 // (a) `CQ-Depend: host:number` Gerrit CL footers allow users to specify 236 // arbitrary dependencies, which typically happens due to typos, 237 // but malicious actors can try to get CL details of restricted projects. 238 // Either way, CV must not be a confused deputy here and must keep track 239 // which project can see what. 240 // 241 // (b) due to recent re-configuration of one or more LUCI projects, either 242 // in CV config and/or in Gerrit ACLs, the previously watched & readable CL 243 // becomes unwatched and/or unreadable. 244 // 245 // (c) a previously existing CL was deleted (e.g. by its owner or Gerrit 246 // administrators). 247 // 248 // (d) eventual consistency of Gerrit masquerading as HTTP 404 on stale replica, 249 // while quorum of replicas think CL actually exists and specific LUCI 250 // project having access to it. 251 // 252 // Unfortunately, (d) isn't easy to distinguish from (b) and (c), so CV resorts 253 // to tracking time since CL became invisible -- the longer, the more likely it 254 // is (b) or (c). 255 // 256 // Furthermore, in case of (a), iff CV knows nothing about specific Gerrit CL 257 // identified as `CQ-Depend: host:change`, CV in general can't determine which 258 // LUCI project is allowed to watch this CL *before* fetching Gerrit project 259 // (repo) and target ref. 260 // 261 // 262 // NOTE on CV as confused deputy. 263 // 264 // CV works with multiple LUCI projects. As of this writing (June 2021), 265 // unfortunately, CV doesn't verify that Gerrit repos watched by a LUCI project 266 // are in fact owned by that LUCI project. Thus, nothing prevents one LUCI 267 // project from starting to watch repos de-facto owned by another LUCI project. 268 // This in turn brings 2 problems: 269 // 270 // (1) Denial of service: unsolved. 271 // Mitigation: CV will refuse to work with CLs which are watched by more 272 // than 1 project. Since CV will communicate by posting message to affected 273 // CL, this should be noticed and fixed quickly. 274 // 275 // (2) Information leaks: solved. 276 // Each LUCI project MUST use project-scoped service account (PSSA) 277 // (migration is under way, see https://crbug.com/824492). 278 // CV uses this account for all interaction with Gerrit on behalf a specific 279 // LUCI project. Corresponding Gerrit repos: 280 // * SHOULD limit read access to its own PSSA + developers, 281 // * MUST limit Submit rights to its own PSSA and possibly developers. 282 // 283 // For example, 284 // * `infra` project has all its Gerrit CLs public and doesn't care about 285 // information leaks. All other LUCI projects can read its CLs, as well 286 // as the whole Internet. 287 // * `infra-internal` project protects its Gerrit CLs, making them visible 288 // to `infra-internal-scoped@...` account only. 289 // When CV queries Gerrit on `infra-internal` behalf, CV uses 290 // `infra-internal-scoped` account and can fetch the data. 291 // * Suppose malicious actor compromised `infra` repo, and placed a new CV 292 // config there to start watching CLs of the `infra-internal` project 293 // as well as super/secret/repo, which wasn't watched by any CV before. 294 // * Unfortunately, CV can't currently object to the new config. 295 // * However, when querying Gerrit on `infra` behalf, CV uses 296 // `infra-scoped@...` account, which presumably won't be configured with 297 // read access to neither infra-internal nor super/secret/repo. 298 // * So, corresponding CLs will have .Access entry recording that 299 // `infra` has no access to them. 300 // * NOTE: CLs of infra-internal will also have .ApplicableConfig with two 301 // projects there, which will prevent normal operation of 302 // `infra-internal` CV but will not cause any leaks. 303 message Access { 304 message Project { 305 // Deprecated. Use no_access_time instead. 306 bool no_access = 1; 307 // The time when this was last re-confirmed. 308 google.protobuf.Timestamp update_time = 2; 309 // The time after which CV should consider lack of access stable. 310 // 311 // TODO(crbug/1216630): may be unset until backfil is done, 312 // in which case use `no_access` field. 313 google.protobuf.Timestamp no_access_time = 3; 314 } 315 map<string, Project> by_project = 1; 316 // TODO(tandrii): per-project ApplicableConfig here. 317 } 318 319 // CLError encapsulates all kinds of CL errors, which ultimately result in 320 // purging of the CL while communicating the reason to the relevant users. 321 // 322 // The primary goal of the CLError is to transport via CV guts sufficient 323 // information to generate a clear user-friendly error message. 324 message CLError { 325 // Next tag is 12. 326 oneof kind { 327 bool owner_lacks_email = 1; 328 WatchedByManyConfigGroups watched_by_many_config_groups = 2; 329 WatchedByManyProjects watched_by_many_projects = 8; 330 InvalidDeps invalid_deps = 3; 331 string unsupported_mode = 4; 332 bool self_cq_depend = 5; 333 string corrupt_gerrit_metadata = 6; 334 ReusedTrigger reused_trigger = 7; 335 // Set to true when the footer "Commit: false" is present. 336 bool commit_blocked = 9; 337 TriggerDeps trigger_deps = 10; 338 // Set with the CL ID of a failed Run, if the purge-requested-CL depends on 339 // the CL. 340 int64 dep_run_failed = 11; 341 } 342 343 message WatchedByManyConfigGroups { 344 // Config group names without LUCI project prefix. 345 repeated string config_groups = 1; 346 } 347 message WatchedByManyProjects { 348 repeated string projects = 1; 349 } 350 351 message InvalidDeps { 352 // Deps not watched by the same LUCI project as the dependent. 353 repeated Dep unwatched = 1; 354 // Deps watched by the same LUCI project but different config group. 355 repeated Dep wrong_config_group = 2; 356 357 // Not yet submitted deps of a full run in non-combinable mode. 358 repeated Dep single_full_deps = 3; 359 // Not yet CQ-ed deps of a Run in combinable mode. 360 repeated Dep combinable_untriggered = 4; 361 // CQ-ed deps of a different mode. 362 repeated Dep combinable_mismatched_mode = 5; 363 364 message TooMany { 365 int32 actual = 1; 366 int32 max_allowed = 2; 367 } 368 // There are more non-submitted deps than is supported by CV. 369 TooMany too_many = 6; 370 } 371 372 // ReusedTrigger means a CL trigger (e.g. CQ+1 vote) has already resulted in 373 // a CQ Run which was finalized. 374 // 375 // Two known cases when this happens with a Gerrit CL: 376 // 1. A user uploads a CL on ref A, then votes CQ+1. 377 // Before the Dry Run completes, the CL is moved to ref B, while 378 // preserving the CQ+1 vote. 379 // The old Run is finalized, but the new Run has the exact same trigger, 380 // which in CQDaemon-compatible mode means the new Run's ID is exactly the 381 // same as the old one, so CV can't create a new Run. 382 // TODO(crbug/1223349): after CQDaemon is deleted, the Run ID generation scheme 383 // can take into account the ref of a CL, and this use case can be allowed. 384 // 2. The same as above but instead of moving CL between refs, abandon and 385 // restore the CL. 386 message ReusedTrigger { 387 // ID of the finalized Run. 388 string run = 1; 389 } 390 391 // TriggerDeps indicates failures for triggering deps. 392 message TriggerDeps { 393 message PermissionDenied { 394 int64 clid = 1; 395 // If set, the vote was attempted on behalf of the Gerrit user. 396 string email = 2; 397 } 398 // IDs of CLs for which trigger attempts failed due to permission denied. 399 repeated PermissionDenied permission_denied = 1; 400 // IDs of CLs for which trigger attempts failed because they were not found 401 // in Gerrit. 402 repeated int64 not_found = 2; 403 // IDs of CLs for which trigger attempts failed due to internal Gerrit 404 // error. 405 repeated int64 internal_gerrit_error = 3; 406 } 407 } 408 409 // CLUpdatedEvent is just a CL ID pinned to its latest known EVersion. 410 message CLUpdatedEvent { 411 int64 clid = 1; 412 int64 eversion = 2; 413 } 414 415 // CLUpdatedEvents is a batch of CLUpdatedEvents. 416 message CLUpdatedEvents { 417 repeated CLUpdatedEvent events = 1; 418 } 419 420 // A string key-value pair. 421 message StringPair { 422 string key = 1; 423 string value = 2; 424 }