go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/span/init_db.sql (about) 1 -- Copyright 2022 The LUCI Authors. 2 -- 3 -- Licensed under the Apache License, Version 2.0 (the "License"); 4 -- you may not use this file except in compliance with the License. 5 -- You may obtain a copy of the License at 6 -- 7 -- http://www.apache.org/licenses/LICENSE-2.0 8 -- 9 -- Unless required by applicable law or agreed to in writing, software 10 -- distributed under the License is distributed on an "AS IS" BASIS, 11 -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 -- See the License for the specific language governing permissions and 13 -- limitations under the License. 14 15 -------------------------------------------------------------------------------- 16 -- This script initializes a LUCI Analysis Spanner database. 17 18 -- FailureAssociationRules associate failures with bugs. When a rule 19 -- is used to match incoming test failures, the resultant cluster is 20 -- known as a 'bug cluster' because the failures in it are associated 21 -- with a bug (via the failure association rule). 22 -- The ID of a bug cluster corresponding to a rule is 23 -- (Project, RuleBasedClusteringAlgorithm, RuleID), where 24 -- RuleBasedClusteringAlgorithm is the algorithm name of the algorithm 25 -- that clusters failures based on failure association rules (e.g. 26 -- 'rules-v2'), and (Project, RuleId) is the ID of the rule. 27 CREATE TABLE FailureAssociationRules ( 28 -- Identity fields. 29 30 -- The LUCI Project this bug belongs to. 31 Project STRING(40) NOT NULL, 32 -- The unique identifier for the rule. This is a randomly generated 33 -- 128-bit ID, encoded as 32 lowercase hexadecimal characters. 34 RuleId STRING(32) NOT NULL, 35 36 -- Failure predicate field (which failures are matched by the rule). 37 38 -- The rule predicate, defining which failures are being associated. 39 RuleDefinition STRING(65536) NOT NULL, 40 -- Whether the bug must still be updated by LUCI Analysis, and whether 41 -- failures should still be matched against this rule. The only allowed 42 -- values are true or NULL (to indicate false). Only if the bug has 43 -- been closed and no failures have been observed for a while should 44 -- this be NULL. This makes it easy to retrofit a NULL_FILTERED index 45 -- in future, if it is needed for performance. 46 IsActive BOOL, 47 -- The time the rule was last updated in a way that caused the 48 -- matched failures to change, i.e. because of a change to RuleDefinition 49 -- or IsActive. (For comparison, updating BugID does NOT change 50 -- the matched failures, so does NOT update this field.) 51 -- When this value changes, it triggers re-clustering. 52 -- Basis for RulesVersion on ClusteringState and ReclusteringRuns. 53 PredicateLastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 54 55 -- Bug fields. 56 57 -- The bug the failures are associated with (part 1). This is the 58 -- bug tracking system containing the bug the failures are associated 59 -- with. The only supported values are 'monorail' and 'buganizer'. 60 BugSystem STRING(16) NOT NULL, 61 -- The bug the failures are associated with (part 2). This is the 62 -- identifier of the bug the failures are associated with, as identified 63 -- by the bug tracking system itself. For monorail, the scheme is 64 -- {project}/{numeric_id}, for buganizer, the scheme is {numeric_id}. 65 BugId STRING(255) NOT NULL, 66 -- Whether this rule should manage the priority and verified status 67 -- of the associated bug based on the impact of the cluster defined 68 -- by this rule. 69 -- The only allowed values are true or NULL (to indicate false). 70 IsManagingBug BOOL, 71 -- Indicates whether we should update the priority of the bug 72 -- that is associated with this rule based on the cluster's impact. 73 -- This toggle has must have no effect if the IsManagingBug toggle is off. 74 -- This value should not be null. 75 -- This column has a default value of TRUE in prod, but it is commented 76 -- out here because spanner emulator doesn't support DEFAULT 77 -- expressions, see https://github.com/GoogleCloudPlatform/cloud-spanner-emulator/issues/71. 78 IsManagingBugPriority BOOL NOT NULL, -- DEFAULT TRUE, 79 -- Tracks when the field IsManagingBugPriority was last updated. 80 IsManagingBugPriorityLastUpdated TIMESTAMP OPTIONS (allow_commit_timestamp=true), 81 82 -- Immutable data. 83 84 -- The suggested cluster this failure association rule was created from 85 -- (if any) (part 1). 86 -- This is the algorithm component of the suggested cluster this rule 87 -- was created from. 88 -- Until re-clustering is complete (and the residual impact of the source 89 -- cluster has reduced to zero), SourceClusterAlgorithm and SourceClusterId 90 -- tell bug filing to ignore the source suggested cluster when 91 -- determining whether new bugs need to be filed. 92 SourceClusterAlgorithm STRING(32) NOT NULL, 93 -- The suggested cluster this failure association rule was created from 94 -- (if any) (part 2). 95 -- This is the algorithm-specific ID component of the suggested cluster 96 -- this rule was created from. 97 SourceClusterId STRING(32) NOT NULL, 98 99 -- System-controlled data (not user modifiable). 100 101 -- Serialized and compressed luci.analysis.internal.bugs.BugManagementState 102 -- proto. State used to control automatic bug management. 103 BugManagementState BYTES(MAX), 104 105 -- Audit fields. 106 107 -- The time the rule was created. 108 CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 109 -- The user which created the rule. If this was auto-filed by LUCI Analysis 110 -- itself, this is the special value 'system'. Otherwise, it is 111 -- an email address. 112 -- 320 is the maximum length of an email address (64 for local part, 113 -- 1 for the '@', and 255 for the domain part). 114 -- A cron job automatically deletes this data after 30 days. 115 CreationUser STRING(320) NOT NULL, 116 -- The last time an auditable field was updated. An auditable field 117 -- is any field other than a system controlled data field. 118 LastAuditableUpdate TIMESTAMP OPTIONS (allow_commit_timestamp=true), 119 -- The last user who updated an auditable field. An auditable field 120 -- is any field other than a system controlled data field. 121 -- If this was LUCI Analysis itself, 122 -- (e.g. in case of an auto-filed bug which was created and never 123 -- modified) this is 'system'. Otherwise, it is an email address. 124 -- A cron job automatically deletes this data after 30 days. 125 LastAuditableUpdateUser STRING(320), 126 -- The last time the rule was updated. 127 LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 128 ) PRIMARY KEY (Project, RuleId); 129 130 -- The failure association rules associated with a bug. This also 131 -- enforces the constraint that there is at most one rule per bug 132 -- per project. 133 CREATE UNIQUE INDEX FailureAssociationRuleByBugAndProject ON FailureAssociationRules(BugSystem, BugId, Project); 134 135 -- Enforces the constraint that only one rule may manage a given bug 136 -- at once. 137 -- This is required to ensure that automatic bug filing does not attempt to 138 -- take conflicting actions (i.e. simultaneously increase and decrease 139 -- priority) on the same bug, because of differing priorities set by 140 -- different rules. 141 CREATE UNIQUE NULL_FILTERED INDEX FailureAssociationRuleByManagedBug ON FailureAssociationRules(BugSystem, BugId, IsManagingBug); 142 143 -- Clustering state records the clustering state of failed test results, organised 144 -- by chunk. 145 CREATE TABLE ClusteringState ( 146 -- The LUCI Project the test results belong to. 147 Project STRING(40) NOT NULL, 148 -- The identity of the chunk of test results. 32 lowercase hexadecimal 149 -- characters assigned by the ingestion process. 150 ChunkId STRING(32) NOT NULL, 151 -- The start of the retention period of the test results in the chunk. 152 PartitionTime TIMESTAMP NOT NULL, 153 -- The identity of the blob storing the chunk's test results. 154 ObjectId STRING(32) NOT NULL, 155 -- The version of clustering algorithms used to cluster test results in this 156 -- chunk. (This is a version over the set of algorithms, distinct from the 157 -- version of a single algorithm, e.g.: 158 -- v1 -> {reason-v1}, v2 -> {reason-v1, testname-v1}, 159 -- v3 -> {reason-v2, testname-v1}.) 160 AlgorithmsVersion INT64 NOT NULL, 161 -- The version of project configuration used by algorithms to match test 162 -- results in this chunk. 163 ConfigVersion TIMESTAMP NOT NULL, 164 -- The version of the set of failure association rules used to match test 165 -- results in this chunk. This is the maximum "Predicate Last Updated" time 166 -- of any failure association rule in the snapshot of failure association 167 -- rules used to match the test results. 168 RulesVersion TIMESTAMP NOT NULL, 169 -- Serialized ChunkClusters proto containing which test result is in which 170 -- cluster. 171 Clusters BYTES(MAX) NOT NULL, 172 -- The Spanner commit timestamp of when the row was last updated. 173 LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 174 ) PRIMARY KEY (Project, ChunkId) 175 , ROW DELETION POLICY (OLDER_THAN(PartitionTime, INTERVAL 90 DAY)); 176 177 -- ReclusteringRuns contains details of runs used to re-cluster test results. 178 CREATE TABLE ReclusteringRuns ( 179 -- The LUCI Project. 180 Project STRING(40) NOT NULL, 181 -- The attempt. This is the timestamp the orchestrator run ends. 182 AttemptTimestamp TIMESTAMP NOT NULL, 183 -- The minimum algorithms version the reclustering run is trying to achieve. 184 -- Chunks with an AlgorithmsVersion less than this value are eligible to be 185 -- re-clustered. 186 AlgorithmsVersion INT64 NOT NULL, 187 -- The minimum config version the reclustering run is trying to achieve. 188 -- Chunks with a ConfigVersion less than this value are eligible to be 189 -- re-clustered. 190 ConfigVersion TIMESTAMP NOT NULL, 191 -- The minimum rules version the reclustering run is trying to achieve. 192 -- Chunks with a RulesVersion less than this value are eligible to be 193 -- re-clustered. 194 RulesVersion TIMESTAMP NOT NULL, 195 -- The number of shards created for this run (for this LUCI project). 196 ShardCount INT64 NOT NULL, 197 -- The number of shards that have reported progress (at least once). 198 -- When this is equal to ShardCount, readers can have confidence Progress 199 -- is a reasonable reflection of the progress made reclustering 200 -- this project. Until then, it is a loose lower-bound. 201 ShardsReported INT64 NOT NULL, 202 -- The progress. This is a value between 0 and 1000*ShardCount. 203 Progress INT64 NOT NULL, 204 ) PRIMARY KEY (Project, AttemptTimestamp DESC) 205 , ROW DELETION POLICY (OLDER_THAN(AttemptTimestamp, INTERVAL 90 DAY)); 206 207 -- ReclusteringShards is used to for shards to report progress re-clustering 208 -- test results. Generally only progress for the most recent minute will 209 -- be in this table. 210 CREATE TABLE ReclusteringShards ( 211 -- A unique number for the shard. Shards are numbered sequentially, 212 -- starting from one. 213 ShardNumber INT64 NOT NULL, 214 -- The attempt. This is the timestamp the orchestrator run ends. 215 AttemptTimestamp TIMESTAMP NOT NULL, 216 -- The LUCI Project the shard is doing reclustering for. 217 Project STRING(40) NOT NULL, 218 -- The progress. This is a value between 0 and 1000. If this is NULL, 219 -- it means progress has not yet been reported by the shard. 220 Progress INT64, 221 ) PRIMARY KEY (ShardNumber, AttemptTimestamp DESC); 222 223 -- Ingestions is used to synchronise and deduplicate the ingestion 224 -- of test results which require data from one or more sources. 225 -- 226 -- Ingestion may only start after the following events are received: 227 -- 1. The build has completed. 228 -- 2. The invocation containing its test results (if any) 229 -- has been finalized. 230 -- 3. The presubmit run (if any) has completed. 231 -- These events may occur in any order (e.g. 3 can occur before 1 if the 232 -- presubmit run fails before all builds are complete). 233 CREATE TABLE Ingestions ( 234 -- The unique key for the ingestion. The current scheme is: 235 -- {buildbucket host name}/{build id}. 236 BuildId STRING(1024) NOT NULL, 237 -- The LUCI Project to which the build belongs. Populated at the same 238 -- time as the build result. 239 BuildProject STRING(40), 240 -- The build result. 241 BuildResult BYTES(MAX), 242 -- The Spanner commit time the build result was populated. 243 BuildJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true), 244 -- Does the build have a ResultDB invocation? If yes, then ingestion should 245 -- wait for the invocation result to be populated before commencing ingestion. 246 -- (In practice, ingestion of a build without an invocation does nothing, but 247 -- we schedule an ingestion for it anyway as for monitoring purposes it is 248 -- convenient if all builds yield an ingestion task.) 249 -- Only populated once either the BuildResult or InvocationResult has been set. 250 HasInvocation BOOL, 251 -- The LUCI Project to which the invocation belongs. Populated at the same 252 -- time as the invocation result. 253 InvocationProject STRING(40), 254 -- The invocation result. 255 InvocationResult BYTES(MAX), 256 -- The Spanner commit time the invocation result was populated. 257 InvocationJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true), 258 -- Is the build part of a presubmit run? If yes, then ingestion should 259 -- wait for the presubmit result to be populated before commencing ingestion. 260 -- Use 'true' to indicate true and NULL to indicate false. 261 -- Only populated once either the BuildResult or PresubmitResult has been set. 262 IsPresubmit BOOL, 263 -- The LUCI Project to which the presubmit run belongs. Populated at the 264 -- same time as the presubmit run result. 265 PresubmitProject STRING(40), 266 -- The presubmit result. 267 PresubmitResult BYTES(MAX), 268 -- The Spanner commit time the presubmit result was populated. 269 PresubmitJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true), 270 -- The Spanner commit time the row last last updated. 271 LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 272 -- The number of test result ingestion tasks have been created for this 273 -- invocation. 274 -- Used to avoid duplicate scheduling of ingestion tasks. If the page_index 275 -- is the index of the page being processed, an ingestion task for the next 276 -- page will only be created if (page_index + 1) == TaskCount. 277 TaskCount INT64, 278 ) PRIMARY KEY (BuildId) 279 -- 90 days retention, plus some margin (10 days) to ensure ingestion records 280 -- are always retained longer than the ingested results (acknowledging 281 -- the partition time on ingested chunks may be later than the LastUpdated 282 -- time if clocks are not synchronised). 283 , ROW DELETION POLICY (OLDER_THAN(LastUpdated, INTERVAL 100 DAY)); 284 285 -- Stores transactional tasks reminders. 286 -- See https://go.chromium.org/luci/server/tq. Scanned by tq-sweeper-spanner. 287 CREATE TABLE TQReminders ( 288 ID STRING(MAX) NOT NULL, 289 FreshUntil TIMESTAMP NOT NULL, 290 Payload BYTES(102400) NOT NULL, 291 ) PRIMARY KEY (ID ASC); 292 293 CREATE TABLE TQLeases ( 294 SectionID STRING(MAX) NOT NULL, 295 LeaseID INT64 NOT NULL, 296 SerializedParts ARRAY<STRING(MAX)>, 297 ExpiresAt TIMESTAMP NOT NULL, 298 ) PRIMARY KEY (SectionID ASC, LeaseID ASC); 299 300 -- Stores test results. 301 -- As of Q2 2022, this table is estimated to collect ~250 billion rows over 302 -- 90 days. Please be mindful of storage implications when adding new fields. 303 -- https://cloud.google.com/spanner/docs/reference/standard-sql/data-types#storage_size_for_data_types 304 -- gives guidance on the storage sizes of data types. 305 CREATE TABLE TestResults ( 306 -- The LUCI Project this test result belongs to. 307 Project STRING(40) NOT NULL, 308 309 -- Unique identifier of the test. 310 -- This has the same value as luci.resultdb.v1.TestResult.test_id. 311 TestId STRING(MAX) NOT NULL, 312 313 -- Partition time, as determined by LUCI Analysis ingestion. 314 -- Start time of the ingested build (for postsubmit results) or start time 315 -- of the presubmit run (for presubmit results). Defines date/time axis of 316 -- test results plotted by date/time, and start of data retention period. 317 -- Including as part of Primary Key allows direct filtering of data for test 318 -- to last N days. This could be used to improve performance for tests with 319 -- many results, or allow experimentation with keeping longer histories 320 -- (e.g. 120 days) without incurring performance penalty on time-windowed 321 -- queries. 322 PartitionTime TIMESTAMP NOT NULL, 323 324 -- A hex-encoded sha256 of concatenated "<key>:<value>\n" variant pairs. 325 -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]), 326 -- where concatenated_key_value_pairs is the result of concatenating 327 -- variant pairs formatted as "<key>:<value>\n" in ascending key order. 328 -- Combination of Realm, TestId and VariantHash can identify a test variant. 329 VariantHash STRING(16) NOT NULL, 330 331 -- The invocation from which these test results were ingested. 332 -- This is the top-level invocation that was ingested. 333 IngestedInvocationId STRING(MAX) NOT NULL, 334 335 -- The index of the test run that contained this test result. 336 -- The test run of a test result is the invocation it is directly 337 -- included inside; typically the invocation for the swarming task 338 -- the tests ran as part of. 339 -- Indexes are assigned to runs based on the order they appear to have 340 -- run in, starting from zero (based on test result timestamps). 341 -- However, if two test runs overlap, the order of indexes for those test 342 -- runs is not guaranteed. 343 RunIndex INT64 NOT NULL, 344 345 -- The index of the test result in the run. The first test result that 346 -- was produced in a run will have index 0, the second will have index 1, 347 -- and so on. 348 ResultIndex INT64 NOT NULL, 349 350 -- Whether the test result was expected. 351 -- The value 'true' is used to encode true, and NULL encodes false. 352 IsUnexpected BOOL, 353 354 -- How long the test execution took, in microseconds. 355 RunDurationUsec INT64, 356 357 -- The test result status. 358 Status INT64 NOT NULL, 359 360 -- The reasons (if any) the test verdict was exonerated. 361 -- If this array is null, the test verdict was not exonerated. 362 -- (Non-null) empty array values are not used. 363 -- This field is stored denormalised. It is guaranteed to be the same for 364 -- all results for a test variant in an ingested invocation. 365 ExonerationReasons ARRAY<INT64>, 366 367 -- The following data from the invocation that is stored denormalised. 368 -- It is guaranteed to be the same for all results for the ingested invocation. 369 370 -- The realm of the test result, excluding project. 62 as ResultDB allows 371 -- at most 64 characters for the construction "<project>:<realm>" and project 372 -- must be at least one character. 373 SubRealm STRING(62) NOT NULL, 374 375 -- Indicate if this test result is from LUCI Bisection run. 376 -- Test results from LUCI Bisection will have an option to be filtered out 377 -- from test history endpoints so that they do not show in the test history page. 378 -- The value 'true' is used to encode true, and NULL encodes false. 379 IsFromBisection BOOL, 380 381 -- The following data from the test verdict stored denormalised. It is guaranteed 382 -- to be the same for all results for the same test verdict. 383 384 -- The identity of the source reference (e.g. git reference / branch) that was tested. 385 -- See go.chromium.org/luci/analysis/pbutil.SourceRefHash for details. 386 SourceRefHash BYTES(8), 387 388 -- The position along the given source reference that was tested. 389 -- This excludes any unsubmitted changes that were tested, which are 390 -- noted separately in the Changelist... fields below. 391 SourcePosition INT64, 392 393 -- The following fields capture information about any unsubmitted 394 -- changelists that were tested by the test execution. The arrays 395 -- are matched in length and correspond in index, i.e. 396 -- ChangelistHosts[OFFSET(0)] corresponds with ChangelistChanges[OFFSET(0)] 397 -- and ChangelistPatchsets[OFFSET(0)], 398 -- with the exception of ChangelistOwnerKinds, for which correspondance 399 -- is not guaranteed until March 2023 (as the column was retrofitted later). 400 -- 401 -- Changelists are stored in ascending lexicographical order (over 402 -- (hostname, change, patchset)). 403 -- They will be set for all presubmit runs, and may be set for other 404 -- builds as well (even those outside a formal LUCI CV run) based on 405 -- buildbucket inputs. At most 10 changelists are included. 406 407 -- Hostname(s) of the gerrit instance of the changelist that was tested 408 -- (if any). For storage efficiency, the suffix "-review.googlesource.com" 409 -- is not stored if it is present. When reading, if the value read 410 -- does not contain dots ('.'), the suffix should be added back. 411 -- Otherwise, the value can be assumed to be complete. 412 ChangelistHosts ARRAY<STRING(255)> NOT NULL, 413 414 -- The changelist number(s), e.g. 12345. 415 ChangelistChanges ARRAY<INT64> NOT NULL, 416 417 -- The patchset number(s) of the changelist, e.g. 1. 418 ChangelistPatchsets ARRAY<INT64> NOT NULL, 419 420 -- The changelist owner kind(s). Elements in this array correspond to 421 -- one of the luci.analysis.v1.ChangelistOwnerKinds values. 422 -- 'U' corresponds to a User changelist, 'A' corresponds to an Automation 423 -- changelist, and '' corresponds to a changelist of unspecified origin. 424 ChangelistOwnerKinds ARRAY<STRING(1)>, 425 426 -- Whether there were any changes made to the sources, not described above. 427 -- For example, a version of a dependency was uprevved in the build (e.g. 428 -- in an autoroller recipe). 429 -- 430 -- Cherry-picking a changelist on top of the base checkout is not considered 431 -- making the sources dirty as it is reported separately above. 432 -- 433 -- NULL is used to indicate false, TRUE indicates true. 434 HasDirtySources BOOL, 435 ) PRIMARY KEY(Project, TestId, PartitionTime DESC, VariantHash, IngestedInvocationId, RunIndex, ResultIndex) 436 , ROW DELETION POLICY (OLDER_THAN(PartitionTime, INTERVAL 90 DAY)); 437 438 -- Stores whether gerrit changes are user or authomation authored. 439 -- This is used as a cache to avoid excessive RPCs to gerrit. 440 -- 441 -- The cache is per-project to avoid confused deputy issues. 442 CREATE TABLE GerritChangelists ( 443 -- LUCI Project on behalf of which the data was fetched. 444 -- This is the project we authenticated to gerrit as when we fetched 445 -- the data. 446 Project STRING(40) NOT NULL, 447 -- The gerrit host. 448 Host STRING(255) NOT NULL, 449 -- The gerrit change number. 450 Change INT64 NOT NULL, 451 -- The changelist owner kind. Corresponds to 452 -- one of the luci.analysis.v1.ChangelistOwnerKinds values. 453 OwnerKind INT64 NOT NULL, 454 -- Used to enforce a deletion policy on this data. 455 CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 456 ) PRIMARY KEY (Project, Host, Change), 457 ROW DELETION POLICY (OLDER_THAN(CreationTime, INTERVAL 100 DAY)); 458 459 -- Serves two purposes: 460 -- - Permits listing of distinct variants observed for a test in a project, 461 -- filtered by Realm. 462 -- 463 -- - Provides a mapping back from VariantHash to variant. 464 -- 465 -- TODO(crbug.com/1266759): 466 -- UniqueTestVariants table in ResultDB will be superseded by this table and 467 -- will need to be deleted. 468 CREATE TABLE TestVariantRealms ( 469 -- The LUCI Project in which the variant was observed. 470 Project STRING(40) NOT NULL, 471 472 -- Unique identifier of the test from which the variant was observed, 473 -- This has the same value as luci.resultdb.v1.TestResult.test_id. 474 TestId STRING(MAX) NOT NULL, 475 476 -- A hex-encoded sha256 of concatenated "<key>:<value>\n" variant pairs. 477 -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]), 478 -- where concatenated_key_value_pairs is the result of concatenating 479 -- variant pairs formatted as "<key>:<value>\n" in ascending key order. 480 -- Combination of Realm, TestId and VariantHash can identify a test variant. 481 VariantHash STRING(16) NOT NULL, 482 483 -- The realm of the test result from which the variant was observed, excluding 484 -- project. 62 as ResultDB allows at most 64 characters for the construction 485 -- "<project>:<realm>" and project must be at least one character. 486 SubRealm STRING(62) NOT NULL, 487 488 -- key:value pairs to specify the way of running the test. 489 -- See also luci.resultdb.v1.TestResult.variant. 490 Variant ARRAY<STRING(MAX)>, 491 492 -- Other information about the test variant, like information from tags, 493 -- could be captured here. (e.g. for test ownership). 494 495 -- Last (ingestion) time this test variant was observed in the realm. 496 -- This value may be out of date by up to 24 hours to allow for contention- 497 -- reducing strategies. 498 LastIngestionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 499 ) PRIMARY KEY(Project, TestId, VariantHash, SubRealm) 500 -- Use a slightly longer retention period to prevent the invocation being 501 -- dropped before the associated TestResults. 502 , ROW DELETION POLICY (OLDER_THAN(LastIngestionTime, INTERVAL 100 DAY)); 503 504 -- Permits listing of distinct tests observed for a project, filtered by Realm. 505 -- This table is created to support test ID substring search, which can often 506 -- lead to a full table scan, which will be significantly slower in the 507 -- TestVariantRealms table. 508 CREATE TABLE TestRealms ( 509 -- The LUCI Project in which the variant was observed. 510 Project STRING(40) NOT NULL, 511 512 -- Unique identifier of the test from which the variant was observed, 513 -- This has the same value as luci.resultdb.v1.TestResult.test_id. 514 TestId STRING(MAX) NOT NULL, 515 516 -- The realm of the test result from which the variant was observed, excluding 517 -- project. 62 as ResultDB allows at most 64 characters for the construction 518 -- "<project>:<realm>" and project must be at least one character. 519 SubRealm STRING(62) NOT NULL, 520 521 -- Last (ingestion) time this test variant was observed in the realm. 522 -- This value may be out of date by up to 24 hours to allow for contention- 523 -- reducing strategies. 524 LastIngestionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 525 ) PRIMARY KEY(Project, TestId, SubRealm) 526 -- Use a slightly longer retention period to prevent the invocation being 527 -- dropped before the associated TestResults. 528 , ROW DELETION POLICY (OLDER_THAN(LastIngestionTime, INTERVAL 100 DAY)); 529 530 -- Uses in test variant analysis (see go/luci-test-variant-analysis-design). 531 -- Stores information about (test, variant, branch) combination. 532 -- The information stored is the verdict history, and the analyzed segment 533 -- results. 534 CREATE TABLE TestVariantBranch ( 535 -- The LUCI Project. 536 Project STRING(40) NOT NULL, 537 -- Unique identifier for the test. 538 TestId STRING(MAX) NOT NULL, 539 -- The identify of the test variant. 540 -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]), 541 -- where concatenated_key_value_pairs is the result of concatenating 542 -- variant pairs formatted as "<key>:<value>\n" in ascending key order. 543 VariantHash STRING(16) NOT NULL, 544 -- The identity of the branch that was tested. 545 RefHash BYTES(8) NOT NULL, 546 -- key:value pairs in the test variant. See also Variant on the ResultDB 547 -- TestResults table. Only written the first time the row is created. 548 Variant ARRAY<STRING(MAX)>, 549 -- ZStandard-compressed, serialized luci.analysis.v1.SourceRef. 550 -- SourceRef represents a reference in a source control system. 551 -- Only written the first time the row is created. 552 SourceRef BYTES(MAX) NOT NULL, 553 -- ZStandard-compressed representation of up to 100 recent test verdicts for 554 -- the test variant. 555 HotInputBuffer BYTES(MAX) NOT NULL, 556 -- ZStandard-compressed representation of up to 2000 recent test verdicts for 557 -- the test variant, after those in HotInputBuffer. Verdicts in 558 -- HotInputBuffer are pushed here when HotInputBuffer is full. 559 ColdInputBuffer BYTES(MAX) NOT NULL, 560 -- ZStandard-compressed, serialized 561 -- luci.analysis.internal.changepoints.Segment proto describing the 562 -- finalizing segment (if any). When verdicts are evicted from the 563 -- ColdInputBuffer, their totals are added to this segment. 564 FinalizingSegment BYTES(MAX) NOT NULL, 565 -- ZStandard-compressed, serialized 566 -- luci.analysis.internal.changepoints.Segments proto describing the 567 -- finalized segments only. 568 -- We only store up to 100 finalized segments. 569 FinalizedSegments BYTES(MAX) NOT NULL, 570 -- ZStandard-compressed, serialized 571 -- luci.analysis.internal.changepoints.Statistics proto with summary 572 -- statistics about evicted verdicts. 573 Statistics BYTES(MAX) NOT NULL DEFAULT(b''), 574 -- The Spanner commit timestamp this row was last updated. 575 -- Used as version timestamp for BigQuery export. 576 LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 577 ) PRIMARY KEY(Project, TestId, VariantHash, RefHash), 578 ROW DELETION POLICY (OLDER_THAN(LastUpdated, INTERVAL 90 DAY)); 579 580 -- This table is to ensure that the ingestion to the TestVariantBranch table 581 -- only happens once, even in the case of task retry. 582 -- This table will store the batches that have been processed by change point 583 -- analysis. 584 -- Insertions to this table will happen in the same transaction as 585 -- insertions to the TestVariantBranch table. 586 -- Before processing a batch of test variants in change point analysis, we will 587 -- first check if the batch exists in this table, if yes, then the batch has 588 -- been processed and we should skip it. 589 -- The retention is set to 31 days, since it is the maximum time a task can 590 -- live in task queue is 31 days (https://cloud.google.com/tasks/docs/quotas). 591 CREATE TABLE TestVariantBranchCheckpoint ( 592 -- The invocation id of the batch. 593 InvocationId STRING(MAX) NOT NULL, 594 -- The test ID of the first test variant in the batch. 595 StartingTestId STRING(MAX) NOT NULL, 596 -- The variant hash of the first test variant in the batch. 597 StartingVariantHash STRING(16) NOT NULL, 598 -- Time that this record was inserted in the table. 599 InsertionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 600 ) PRIMARY KEY(InvocationId, StartingTestId, StartingVariantHash), 601 ROW DELETION POLICY (OLDER_THAN(InsertionTime, INTERVAL 31 DAY)); 602 603 -- This table is to determine if a run is a recycled run for test variant 604 -- analysis. 605 CREATE TABLE Invocations ( 606 -- The LUCI Project of the Ingested Invocation. Although Invocation IDs 607 -- are unique on their own, this enforces hard separation between the data 608 -- of each project. It ensures the same test results can be ingested into two 609 -- different LUCI Projects, but not the same project. 610 Project STRING(40) NOT NULL, 611 -- The invocation of the test run. 612 InvocationID STRING(MAX) NOT NULL, 613 -- The top-level invocation the test run will be ingested as part of. 614 -- If it is seen in another invocation ID, it will be considered a 615 -- duplicate test run. 616 IngestedInvocationID STRING(MAX) NOT NULL, 617 -- Used to enforce a deletion policy on this data. 618 CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), 619 ) PRIMARY KEY(Project, InvocationID), 620 ROW DELETION POLICY (OLDER_THAN(CreationTime, INTERVAL 91 DAY));