go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/span/init_db.sql (about)

     1  -- Copyright 2022 The LUCI Authors.
     2  -- 
     3  -- Licensed under the Apache License, Version 2.0 (the "License");
     4  -- you may not use this file except in compliance with the License.
     5  -- You may obtain a copy of the License at
     6  -- 
     7  --      http://www.apache.org/licenses/LICENSE-2.0
     8  -- 
     9  -- Unless required by applicable law or agreed to in writing, software
    10  -- distributed under the License is distributed on an "AS IS" BASIS,
    11  -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  -- See the License for the specific language governing permissions and
    13  -- limitations under the License.
    14  
    15  --------------------------------------------------------------------------------
    16  -- This script initializes a LUCI Analysis Spanner database.
    17  
    18  -- FailureAssociationRules associate failures with bugs. When a rule
    19  -- is used to match incoming test failures, the resultant cluster is
    20  -- known as a 'bug cluster' because the failures in it are associated
    21  -- with a bug (via the failure association rule).
    22  -- The ID of a bug cluster corresponding to a rule is
    23  -- (Project, RuleBasedClusteringAlgorithm, RuleID), where
    24  -- RuleBasedClusteringAlgorithm is the algorithm name of the algorithm
    25  -- that clusters failures based on failure association rules (e.g.
    26  -- 'rules-v2'), and (Project, RuleId) is the ID of the rule.
    27  CREATE TABLE FailureAssociationRules (
    28    -- Identity fields.
    29  
    30    -- The LUCI Project this bug belongs to.
    31    Project STRING(40) NOT NULL,
    32    -- The unique identifier for the rule. This is a randomly generated
    33    -- 128-bit ID, encoded as 32 lowercase hexadecimal characters.
    34    RuleId STRING(32) NOT NULL,
    35  
    36    -- Failure predicate field (which failures are matched by the rule).
    37  
    38    -- The rule predicate, defining which failures are being associated.
    39    RuleDefinition STRING(65536) NOT NULL,
    40    -- Whether the bug must still be updated by LUCI Analysis, and whether
    41    -- failures should still be matched against this rule. The only allowed
    42    -- values are true or NULL (to indicate false). Only if the bug has
    43    -- been closed and no failures have been observed for a while should
    44    -- this be NULL. This makes it easy to retrofit a NULL_FILTERED index
    45    -- in future, if it is needed for performance.
    46    IsActive BOOL,
    47    -- The time the rule was last updated in a way that caused the
    48    -- matched failures to change, i.e. because of a change to RuleDefinition
    49    -- or IsActive. (For comparison, updating BugID does NOT change
    50    -- the matched failures, so does NOT update this field.)
    51    -- When this value changes, it triggers re-clustering.
    52    -- Basis for RulesVersion on ClusteringState and ReclusteringRuns.
    53    PredicateLastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
    54  
    55    -- Bug fields.
    56  
    57    -- The bug the failures are associated with (part 1). This is the
    58    -- bug tracking system containing the bug the failures are associated
    59    -- with. The only supported values are 'monorail' and 'buganizer'.
    60    BugSystem STRING(16) NOT NULL,
    61    -- The bug the failures are associated with (part 2). This is the
    62    -- identifier of the bug the failures are associated with, as identified
    63    -- by the bug tracking system itself. For monorail, the scheme is
    64    -- {project}/{numeric_id}, for buganizer, the scheme is {numeric_id}.
    65    BugId STRING(255) NOT NULL,
    66    -- Whether this rule should manage the priority and verified status
    67    -- of the associated bug based on the impact of the cluster defined
    68    -- by this rule.
    69    -- The only allowed values are true or NULL (to indicate false).
    70    IsManagingBug BOOL,
    71    -- Indicates whether we should update the priority of the bug
    72    -- that is associated with this rule based on the cluster's impact.
    73    -- This toggle has must have no effect if the IsManagingBug toggle is off.
    74    -- This value should not be null.
    75    -- This column has a default value of TRUE in prod, but it is commented
    76    -- out here because spanner emulator doesn't support DEFAULT
    77    -- expressions, see https://github.com/GoogleCloudPlatform/cloud-spanner-emulator/issues/71.
    78    IsManagingBugPriority BOOL NOT NULL, -- DEFAULT TRUE,
    79    -- Tracks when the field IsManagingBugPriority was last updated.
    80    IsManagingBugPriorityLastUpdated TIMESTAMP OPTIONS (allow_commit_timestamp=true),
    81  
    82    -- Immutable data.
    83  
    84    -- The suggested cluster this failure association rule was created from
    85    -- (if any) (part 1).
    86    -- This is the algorithm component of the suggested cluster this rule
    87    -- was created from.
    88    -- Until re-clustering is complete (and the residual impact of the source
    89    -- cluster has reduced to zero), SourceClusterAlgorithm and SourceClusterId
    90    -- tell bug filing to ignore the source suggested cluster when
    91    -- determining whether new bugs need to be filed.
    92    SourceClusterAlgorithm STRING(32) NOT NULL,
    93    -- The suggested cluster this failure association rule was created from
    94    -- (if any) (part 2).
    95    -- This is the algorithm-specific ID component of the suggested cluster
    96    -- this rule was created from.
    97    SourceClusterId STRING(32) NOT NULL,
    98  
    99    -- System-controlled data (not user modifiable).
   100  
   101    -- Serialized and compressed luci.analysis.internal.bugs.BugManagementState
   102    -- proto. State used to control automatic bug management.
   103    BugManagementState BYTES(MAX),
   104  
   105    -- Audit fields.
   106  
   107    -- The time the rule was created.
   108    CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   109    -- The user which created the rule. If this was auto-filed by LUCI Analysis
   110    -- itself, this is the special value 'system'. Otherwise, it is
   111    -- an email address.
   112    -- 320 is the maximum length of an email address (64 for local part,
   113    -- 1 for the '@', and 255 for the domain part).
   114    -- A cron job automatically deletes this data after 30 days.
   115    CreationUser STRING(320) NOT NULL,
   116    -- The last time an auditable field was updated. An auditable field
   117    -- is any field other than a system controlled data field.
   118    LastAuditableUpdate TIMESTAMP OPTIONS (allow_commit_timestamp=true),
   119    -- The last user who updated an auditable field. An auditable field
   120    -- is any field other than a system controlled data field.
   121    -- If this was LUCI Analysis itself,
   122    -- (e.g. in case of an auto-filed bug which was created and never
   123    -- modified) this is 'system'. Otherwise, it is an email address.
   124    -- A cron job automatically deletes this data after 30 days.
   125    LastAuditableUpdateUser STRING(320),
   126    -- The last time the rule was updated.
   127    LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   128  ) PRIMARY KEY (Project, RuleId);
   129  
   130  -- The failure association rules associated with a bug. This also
   131  -- enforces the constraint that there is at most one rule per bug
   132  -- per project.
   133  CREATE UNIQUE INDEX FailureAssociationRuleByBugAndProject ON FailureAssociationRules(BugSystem, BugId, Project);
   134  
   135  -- Enforces the constraint that only one rule may manage a given bug
   136  -- at once.
   137  -- This is required to ensure that automatic bug filing does not attempt to
   138  -- take conflicting actions (i.e. simultaneously increase and decrease
   139  -- priority) on the same bug, because of differing priorities set by
   140  -- different rules.
   141  CREATE UNIQUE NULL_FILTERED INDEX FailureAssociationRuleByManagedBug ON FailureAssociationRules(BugSystem, BugId, IsManagingBug);
   142  
   143  -- Clustering state records the clustering state of failed test results, organised
   144  -- by chunk.
   145  CREATE TABLE ClusteringState (
   146    -- The LUCI Project the test results belong to.
   147    Project STRING(40) NOT NULL,
   148    -- The identity of the chunk of test results. 32 lowercase hexadecimal
   149    -- characters assigned by the ingestion process.
   150    ChunkId STRING(32) NOT NULL,
   151    -- The start of the retention period of the test results in the chunk.
   152    PartitionTime TIMESTAMP NOT NULL,
   153    -- The identity of the blob storing the chunk's test results.
   154    ObjectId STRING(32) NOT NULL,
   155    -- The version of clustering algorithms used to cluster test results in this
   156    -- chunk. (This is a version over the set of algorithms, distinct from the
   157    -- version of a single algorithm, e.g.:
   158    -- v1 -> {reason-v1}, v2 -> {reason-v1, testname-v1},
   159    -- v3 -> {reason-v2, testname-v1}.)
   160    AlgorithmsVersion INT64 NOT NULL,
   161    -- The version of project configuration used by algorithms to match test
   162    -- results in this chunk.
   163    ConfigVersion TIMESTAMP NOT NULL,
   164    -- The version of the set of failure association rules used to match test
   165    -- results in this chunk. This is the maximum "Predicate Last Updated" time
   166    -- of any failure association rule in the snapshot of failure association
   167    -- rules used to match the test results.
   168    RulesVersion TIMESTAMP NOT NULL,
   169    -- Serialized ChunkClusters proto containing which test result is in which
   170    -- cluster.
   171    Clusters BYTES(MAX) NOT NULL,
   172    -- The Spanner commit timestamp of when the row was last updated.
   173    LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   174  ) PRIMARY KEY (Project, ChunkId)
   175  , ROW DELETION POLICY (OLDER_THAN(PartitionTime, INTERVAL 90 DAY));
   176  
   177  -- ReclusteringRuns contains details of runs used to re-cluster test results.
   178  CREATE TABLE ReclusteringRuns (
   179    -- The LUCI Project.
   180    Project STRING(40) NOT NULL,
   181    -- The attempt. This is the timestamp the orchestrator run ends.
   182    AttemptTimestamp TIMESTAMP NOT NULL,
   183    -- The minimum algorithms version the reclustering run is trying to achieve.
   184    -- Chunks with an AlgorithmsVersion less than this value are eligible to be
   185    -- re-clustered.
   186    AlgorithmsVersion INT64 NOT NULL,
   187    -- The minimum config version the reclustering run is trying to achieve.
   188    -- Chunks with a ConfigVersion less than this value are eligible to be
   189    -- re-clustered.
   190    ConfigVersion TIMESTAMP NOT NULL,
   191    -- The minimum rules version the reclustering run is trying to achieve.
   192    -- Chunks with a RulesVersion less than this value are eligible to be
   193    -- re-clustered.
   194    RulesVersion TIMESTAMP NOT NULL,
   195    -- The number of shards created for this run (for this LUCI project).
   196    ShardCount INT64 NOT NULL,
   197    -- The number of shards that have reported progress (at least once).
   198    -- When this is equal to ShardCount, readers can have confidence Progress
   199    -- is a reasonable reflection of the progress made reclustering
   200    -- this project. Until then, it is a loose lower-bound.
   201    ShardsReported INT64 NOT NULL,
   202    -- The progress. This is a value between 0 and 1000*ShardCount.
   203    Progress INT64 NOT NULL,
   204  ) PRIMARY KEY (Project, AttemptTimestamp DESC)
   205  , ROW DELETION POLICY (OLDER_THAN(AttemptTimestamp, INTERVAL 90 DAY));
   206  
   207  -- ReclusteringShards is used to for shards to report progress re-clustering
   208  -- test results. Generally only progress for the most recent minute will
   209  -- be in this table.
   210  CREATE TABLE ReclusteringShards (
   211    -- A unique number for the shard. Shards are numbered sequentially,
   212    -- starting from one.
   213    ShardNumber INT64 NOT NULL,
   214    -- The attempt. This is the timestamp the orchestrator run ends.
   215    AttemptTimestamp TIMESTAMP NOT NULL,
   216    -- The LUCI Project the shard is doing reclustering for.
   217    Project STRING(40) NOT NULL,
   218    -- The progress. This is a value between 0 and 1000. If this is NULL,
   219    -- it means progress has not yet been reported by the shard.
   220    Progress INT64,
   221  ) PRIMARY KEY (ShardNumber, AttemptTimestamp DESC);
   222  
   223  -- Ingestions is used to synchronise and deduplicate the ingestion
   224  -- of test results which require data from one or more sources.
   225  --
   226  -- Ingestion may only start after the following events are received:
   227  -- 1. The build has completed.
   228  -- 2. The invocation containing its test results (if any)
   229  --    has been finalized.
   230  -- 3. The presubmit run (if any) has completed.
   231  -- These events may occur in any order (e.g. 3 can occur before 1 if the
   232  -- presubmit run fails before all builds are complete).
   233  CREATE TABLE Ingestions (
   234    -- The unique key for the ingestion. The current scheme is:
   235    -- {buildbucket host name}/{build id}.
   236    BuildId STRING(1024) NOT NULL,
   237    -- The LUCI Project to which the build belongs. Populated at the same
   238    -- time as the build result.
   239    BuildProject STRING(40),
   240    -- The build result.
   241    BuildResult BYTES(MAX),
   242    -- The Spanner commit time the build result was populated.
   243    BuildJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true),
   244    -- Does the build have a ResultDB invocation? If yes, then ingestion should
   245    -- wait for the invocation result to be populated before commencing ingestion.
   246    -- (In practice, ingestion of a build without an invocation does nothing, but
   247    -- we schedule an ingestion for it anyway as for monitoring purposes it is
   248    -- convenient if all builds yield an ingestion task.)
   249    -- Only populated once either the BuildResult or InvocationResult has been set.
   250    HasInvocation BOOL,
   251    -- The LUCI Project to which the invocation belongs. Populated at the same
   252    -- time as the invocation result.
   253    InvocationProject STRING(40),
   254    -- The invocation result.
   255    InvocationResult BYTES(MAX),
   256    -- The Spanner commit time the invocation result was populated.
   257    InvocationJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true),
   258    -- Is the build part of a presubmit run? If yes, then ingestion should
   259    -- wait for the presubmit result to be populated before commencing ingestion.
   260    -- Use 'true' to indicate true and NULL to indicate false.
   261    -- Only populated once either the BuildResult or PresubmitResult has been set.
   262    IsPresubmit BOOL,
   263    -- The LUCI Project to which the presubmit run belongs. Populated at the
   264    -- same time as the presubmit run result.
   265    PresubmitProject STRING(40),
   266    -- The presubmit result.
   267    PresubmitResult BYTES(MAX),
   268    -- The Spanner commit time the presubmit result was populated.
   269    PresubmitJoinedTime TIMESTAMP OPTIONS (allow_commit_timestamp=true),
   270    -- The Spanner commit time the row last last updated.
   271    LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   272    -- The number of test result ingestion tasks have been created for this
   273    -- invocation.
   274    -- Used to avoid duplicate scheduling of ingestion tasks. If the page_index
   275    -- is the index of the page being processed, an ingestion task for the next
   276    -- page will only be created if (page_index + 1) == TaskCount.
   277    TaskCount INT64,
   278  ) PRIMARY KEY (BuildId)
   279  -- 90 days retention, plus some margin (10 days) to ensure ingestion records
   280  -- are always retained longer than the ingested results (acknowledging
   281  -- the partition time on ingested chunks may be later than the LastUpdated
   282  -- time if clocks are not synchronised).
   283  , ROW DELETION POLICY (OLDER_THAN(LastUpdated, INTERVAL 100 DAY));
   284  
   285  -- Stores transactional tasks reminders.
   286  -- See https://go.chromium.org/luci/server/tq. Scanned by tq-sweeper-spanner.
   287  CREATE TABLE TQReminders (
   288      ID STRING(MAX) NOT NULL,
   289      FreshUntil TIMESTAMP NOT NULL,
   290      Payload BYTES(102400) NOT NULL,
   291  ) PRIMARY KEY (ID ASC);
   292  
   293  CREATE TABLE TQLeases (
   294      SectionID STRING(MAX) NOT NULL,
   295      LeaseID INT64 NOT NULL,
   296      SerializedParts ARRAY<STRING(MAX)>,
   297      ExpiresAt TIMESTAMP NOT NULL,
   298  ) PRIMARY KEY (SectionID ASC, LeaseID ASC);
   299  
   300  -- Stores test results.
   301  -- As of Q2 2022, this table is estimated to collect ~250 billion rows over
   302  -- 90 days. Please be mindful of storage implications when adding new fields.
   303  -- https://cloud.google.com/spanner/docs/reference/standard-sql/data-types#storage_size_for_data_types
   304  -- gives guidance on the storage sizes of data types.
   305  CREATE TABLE TestResults (
   306    -- The LUCI Project this test result belongs to.
   307    Project STRING(40) NOT NULL,
   308  
   309    -- Unique identifier of the test.
   310    -- This has the same value as luci.resultdb.v1.TestResult.test_id.
   311    TestId STRING(MAX) NOT NULL,
   312  
   313    -- Partition time, as determined by LUCI Analysis ingestion.
   314    -- Start time of the ingested build (for postsubmit results) or start time
   315    -- of the presubmit run (for presubmit results). Defines date/time axis of
   316    -- test results plotted by date/time, and start of data retention period.
   317    -- Including as part of Primary Key allows direct filtering of data for test
   318    -- to last N days. This could be used to improve performance for tests with
   319    -- many results, or allow experimentation with keeping longer histories
   320    -- (e.g. 120 days) without incurring performance penalty on time-windowed
   321    -- queries.
   322    PartitionTime TIMESTAMP NOT NULL,
   323  
   324    -- A hex-encoded sha256 of concatenated "<key>:<value>\n" variant pairs.
   325    -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]),
   326    -- where concatenated_key_value_pairs is the result of concatenating
   327    -- variant pairs formatted as "<key>:<value>\n" in ascending key order.
   328    -- Combination of Realm, TestId and VariantHash can identify a test variant.
   329    VariantHash STRING(16) NOT NULL,
   330  
   331    -- The invocation from which these test results were ingested.
   332    -- This is the top-level invocation that was ingested.
   333    IngestedInvocationId STRING(MAX) NOT NULL,
   334  
   335    -- The index of the test run that contained this test result.
   336    -- The test run of a test result is the invocation it is directly
   337    -- included inside; typically the invocation for the swarming task
   338    -- the tests ran as part of.
   339    -- Indexes are assigned to runs based on the order they appear to have
   340    -- run in, starting from zero (based on test result timestamps).
   341    -- However, if two test runs overlap, the order of indexes for those test
   342    -- runs is not guaranteed.
   343    RunIndex INT64 NOT NULL,
   344  
   345    -- The index of the test result in the run. The first test result that
   346    -- was produced in a run will have index 0, the second will have index 1,
   347    -- and so on.
   348    ResultIndex INT64 NOT NULL,
   349  
   350    -- Whether the test result was expected.
   351    -- The value 'true' is used to encode true, and NULL encodes false.
   352    IsUnexpected BOOL,
   353  
   354    -- How long the test execution took, in microseconds.
   355    RunDurationUsec INT64,
   356  
   357    -- The test result status.
   358    Status INT64 NOT NULL,
   359  
   360    -- The reasons (if any) the test verdict was exonerated.
   361    -- If this array is null, the test verdict was not exonerated.
   362    -- (Non-null) empty array values are not used.
   363    -- This field is stored denormalised. It is guaranteed to be the same for
   364    -- all results for a test variant in an ingested invocation.
   365    ExonerationReasons ARRAY<INT64>,
   366  
   367    -- The following data from the invocation that is stored denormalised.
   368    -- It is guaranteed to be the same for all results for the ingested invocation.
   369  
   370    -- The realm of the test result, excluding project. 62 as ResultDB allows
   371    -- at most 64 characters for the construction "<project>:<realm>" and project
   372    -- must be at least one character.
   373    SubRealm STRING(62) NOT NULL,
   374  
   375    -- Indicate if this test result is from LUCI Bisection run.
   376    -- Test results from LUCI Bisection will have an option to be filtered out
   377    -- from test history endpoints so that they do not show in the test history page.
   378    -- The value 'true' is used to encode true, and NULL encodes false.
   379    IsFromBisection BOOL,
   380  
   381    -- The following data from the test verdict stored denormalised. It is guaranteed
   382    -- to be the same for all results for the same test verdict.
   383  
   384    -- The identity of the source reference (e.g. git reference / branch) that was tested.
   385    -- See go.chromium.org/luci/analysis/pbutil.SourceRefHash for details.
   386    SourceRefHash BYTES(8),
   387  
   388    -- The position along the given source reference that was tested.
   389    -- This excludes any unsubmitted changes that were tested, which are
   390    -- noted separately in the Changelist... fields below.
   391    SourcePosition INT64,
   392  
   393    -- The following fields capture information about any unsubmitted
   394    -- changelists that were tested by the test execution. The arrays
   395    -- are matched in length and correspond in index, i.e.
   396    -- ChangelistHosts[OFFSET(0)] corresponds with ChangelistChanges[OFFSET(0)]
   397    -- and ChangelistPatchsets[OFFSET(0)],
   398    -- with the exception of ChangelistOwnerKinds, for which correspondance
   399    -- is not guaranteed until March 2023 (as the column was retrofitted later).
   400    --
   401    -- Changelists are stored in ascending lexicographical order (over
   402    -- (hostname, change, patchset)).
   403    -- They will be set for all presubmit runs, and may be set for other
   404    -- builds as well (even those outside a formal LUCI CV run) based on
   405    -- buildbucket inputs. At most 10 changelists are included.
   406  
   407    -- Hostname(s) of the gerrit instance of the changelist that was tested
   408    -- (if any). For storage efficiency, the suffix "-review.googlesource.com"
   409    -- is not stored if it is present. When reading, if the value read
   410    -- does not contain dots ('.'), the suffix should be added back.
   411    -- Otherwise, the value can be assumed to be complete.
   412    ChangelistHosts ARRAY<STRING(255)> NOT NULL,
   413  
   414    -- The changelist number(s), e.g. 12345.
   415    ChangelistChanges ARRAY<INT64> NOT NULL,
   416  
   417    -- The patchset number(s) of the changelist, e.g. 1.
   418    ChangelistPatchsets ARRAY<INT64> NOT NULL,
   419  
   420    -- The changelist owner kind(s). Elements in this array correspond to
   421    -- one of the luci.analysis.v1.ChangelistOwnerKinds values.
   422    -- 'U' corresponds to a User changelist, 'A' corresponds to an Automation
   423    -- changelist, and '' corresponds to a changelist of unspecified origin.
   424    ChangelistOwnerKinds ARRAY<STRING(1)>,
   425  
   426    -- Whether there were any changes made to the sources, not described above.
   427    -- For example, a version of a dependency was uprevved in the build (e.g.
   428    -- in an autoroller recipe).
   429    --
   430    -- Cherry-picking a changelist on top of the base checkout is not considered
   431    -- making the sources dirty as it is reported separately above.
   432    --
   433    -- NULL is used to indicate false, TRUE indicates true.
   434    HasDirtySources BOOL,
   435  ) PRIMARY KEY(Project, TestId, PartitionTime DESC, VariantHash, IngestedInvocationId, RunIndex, ResultIndex)
   436  , ROW DELETION POLICY (OLDER_THAN(PartitionTime, INTERVAL 90 DAY));
   437  
   438  -- Stores whether gerrit changes are user or authomation authored.
   439  -- This is used as a cache to avoid excessive RPCs to gerrit.
   440  --
   441  -- The cache is per-project to avoid confused deputy issues.
   442  CREATE TABLE GerritChangelists (
   443    -- LUCI Project on behalf of which the data was fetched.
   444    -- This is the project we authenticated to gerrit as when we fetched
   445    -- the data.
   446    Project STRING(40) NOT NULL,
   447    -- The gerrit host.
   448    Host STRING(255) NOT NULL,
   449    -- The gerrit change number.
   450    Change INT64 NOT NULL,
   451    -- The changelist owner kind. Corresponds to
   452    -- one of the luci.analysis.v1.ChangelistOwnerKinds values.
   453    OwnerKind INT64 NOT NULL,
   454    -- Used to enforce a deletion policy on this data.
   455    CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   456  ) PRIMARY KEY (Project, Host, Change),
   457    ROW DELETION POLICY (OLDER_THAN(CreationTime, INTERVAL 100 DAY));
   458  
   459  -- Serves two purposes:
   460  -- - Permits listing of distinct variants observed for a test in a project,
   461  --   filtered by Realm.
   462  --
   463  -- - Provides a mapping back from VariantHash to variant.
   464  --
   465  -- TODO(crbug.com/1266759):
   466  -- UniqueTestVariants table in ResultDB will be superseded by this table and
   467  -- will need to be deleted.
   468  CREATE TABLE TestVariantRealms (
   469    -- The LUCI Project in which the variant was observed.
   470    Project STRING(40) NOT NULL,
   471  
   472    -- Unique identifier of the test from which the variant was observed,
   473    -- This has the same value as luci.resultdb.v1.TestResult.test_id.
   474    TestId STRING(MAX) NOT NULL,
   475  
   476    -- A hex-encoded sha256 of concatenated "<key>:<value>\n" variant pairs.
   477    -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]),
   478    -- where concatenated_key_value_pairs is the result of concatenating
   479    -- variant pairs formatted as "<key>:<value>\n" in ascending key order.
   480    -- Combination of Realm, TestId and VariantHash can identify a test variant.
   481    VariantHash STRING(16) NOT NULL,
   482  
   483    -- The realm of the test result from which the variant was observed, excluding
   484    -- project. 62 as ResultDB allows at most 64 characters for the construction
   485    -- "<project>:<realm>" and project must be at least one character.
   486    SubRealm STRING(62) NOT NULL,
   487  
   488    -- key:value pairs to specify the way of running the test.
   489    -- See also luci.resultdb.v1.TestResult.variant.
   490    Variant ARRAY<STRING(MAX)>,
   491  
   492    -- Other information about the test variant, like information from tags,
   493    -- could be captured here. (e.g. for test ownership).
   494  
   495    -- Last (ingestion) time this test variant was observed in the realm.
   496    -- This value may be out of date by up to 24 hours to allow for contention-
   497    -- reducing strategies.
   498    LastIngestionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   499  ) PRIMARY KEY(Project, TestId, VariantHash, SubRealm)
   500  -- Use a slightly longer retention period to prevent the invocation being
   501  -- dropped before the associated TestResults.
   502  , ROW DELETION POLICY (OLDER_THAN(LastIngestionTime, INTERVAL 100 DAY));
   503  
   504  -- Permits listing of distinct tests observed for a project, filtered by Realm.
   505  -- This table is created to support test ID substring search, which can often
   506  -- lead to a full table scan, which will be significantly slower in the
   507  -- TestVariantRealms table.
   508  CREATE TABLE TestRealms (
   509    -- The LUCI Project in which the variant was observed.
   510    Project STRING(40) NOT NULL,
   511  
   512    -- Unique identifier of the test from which the variant was observed,
   513    -- This has the same value as luci.resultdb.v1.TestResult.test_id.
   514    TestId STRING(MAX) NOT NULL,
   515  
   516    -- The realm of the test result from which the variant was observed, excluding
   517    -- project. 62 as ResultDB allows at most 64 characters for the construction
   518    -- "<project>:<realm>" and project must be at least one character.
   519    SubRealm STRING(62) NOT NULL,
   520  
   521    -- Last (ingestion) time this test variant was observed in the realm.
   522    -- This value may be out of date by up to 24 hours to allow for contention-
   523    -- reducing strategies.
   524    LastIngestionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   525  ) PRIMARY KEY(Project, TestId, SubRealm)
   526  -- Use a slightly longer retention period to prevent the invocation being
   527  -- dropped before the associated TestResults.
   528  , ROW DELETION POLICY (OLDER_THAN(LastIngestionTime, INTERVAL 100 DAY));
   529  
   530  -- Uses in test variant analysis (see go/luci-test-variant-analysis-design).
   531  -- Stores information about (test, variant, branch) combination.
   532  -- The information stored is the verdict history, and the analyzed segment
   533  -- results.
   534  CREATE TABLE TestVariantBranch (
   535    -- The LUCI Project.
   536    Project STRING(40) NOT NULL,
   537    -- Unique identifier for the test.
   538    TestId STRING(MAX) NOT NULL,
   539    -- The identify of the test variant.
   540    -- Computed as hex(sha256(<concatenated_key_value_pairs>)[:8]),
   541    -- where concatenated_key_value_pairs is the result of concatenating
   542    -- variant pairs formatted as "<key>:<value>\n" in ascending key order.
   543    VariantHash STRING(16) NOT NULL,
   544    -- The identity of the branch that was tested.
   545    RefHash BYTES(8) NOT NULL,
   546    -- key:value pairs in the test variant. See also Variant on the ResultDB
   547    -- TestResults table. Only written the first time the row is created.
   548    Variant ARRAY<STRING(MAX)>,
   549    -- ZStandard-compressed, serialized luci.analysis.v1.SourceRef.
   550    -- SourceRef represents a reference in a source control system.
   551    -- Only written the first time the row is created.
   552    SourceRef BYTES(MAX) NOT NULL,
   553    -- ZStandard-compressed representation of up to 100 recent test verdicts for
   554    --  the test variant.
   555    HotInputBuffer BYTES(MAX) NOT NULL,
   556    -- ZStandard-compressed representation of up to 2000 recent test verdicts for
   557    --  the test variant, after those in HotInputBuffer. Verdicts in
   558    -- HotInputBuffer are pushed here when HotInputBuffer is full.
   559    ColdInputBuffer BYTES(MAX) NOT NULL,
   560    -- ZStandard-compressed, serialized
   561    -- luci.analysis.internal.changepoints.Segment proto describing the
   562    -- finalizing segment (if any). When verdicts are evicted from the
   563    -- ColdInputBuffer, their totals are added to this segment.
   564    FinalizingSegment BYTES(MAX) NOT NULL,
   565    -- ZStandard-compressed, serialized
   566    -- luci.analysis.internal.changepoints.Segments proto describing the
   567    -- finalized segments only.
   568    -- We only store up to 100 finalized segments.
   569    FinalizedSegments BYTES(MAX) NOT NULL,
   570    -- ZStandard-compressed, serialized
   571    -- luci.analysis.internal.changepoints.Statistics proto with summary
   572    -- statistics about evicted verdicts.
   573    Statistics BYTES(MAX) NOT NULL DEFAULT(b''),
   574    -- The Spanner commit timestamp this row was last updated.
   575    -- Used as version timestamp for BigQuery export.
   576    LastUpdated TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   577  ) PRIMARY KEY(Project, TestId, VariantHash, RefHash),
   578    ROW DELETION POLICY (OLDER_THAN(LastUpdated, INTERVAL 90 DAY));
   579  
   580  -- This table is to ensure that the ingestion to the TestVariantBranch table
   581  -- only happens once, even in the case of task retry.
   582  -- This table will store the batches that have been processed by change point
   583  -- analysis.
   584  -- Insertions to this table will happen in the same transaction as
   585  -- insertions to the TestVariantBranch table.
   586  -- Before processing a batch of test variants in change point analysis, we will
   587  -- first check if the batch exists in this table, if yes, then the batch has
   588  -- been processed and we should skip it.
   589  -- The retention is set to 31 days, since it is the maximum time a task can
   590  -- live in task queue is 31 days (https://cloud.google.com/tasks/docs/quotas).
   591  CREATE TABLE TestVariantBranchCheckpoint (
   592    -- The invocation id of the batch.
   593    InvocationId STRING(MAX) NOT NULL,
   594    -- The test ID of the first test variant in the batch.
   595    StartingTestId STRING(MAX) NOT NULL,
   596    -- The variant hash of the first test variant in the batch.
   597    StartingVariantHash STRING(16) NOT NULL,
   598    -- Time that this record was inserted in the table.
   599    InsertionTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   600  ) PRIMARY KEY(InvocationId, StartingTestId, StartingVariantHash),
   601    ROW DELETION POLICY (OLDER_THAN(InsertionTime, INTERVAL 31 DAY));
   602  
   603  -- This table is to determine if a run is a recycled run for test variant
   604  -- analysis.
   605  CREATE TABLE Invocations (
   606    -- The LUCI Project of the Ingested Invocation. Although Invocation IDs
   607    -- are unique on their own, this enforces hard separation between the data
   608    -- of each project. It ensures the same test results can be ingested into two
   609    -- different LUCI Projects, but not the same project.
   610    Project STRING(40) NOT NULL,
   611    -- The invocation of the test run.
   612    InvocationID STRING(MAX) NOT NULL,
   613    -- The top-level invocation the test run will be ingested as part of.
   614    -- If it is seen in another invocation ID, it will be considered a
   615    -- duplicate test run.
   616    IngestedInvocationID STRING(MAX) NOT NULL,
   617    -- Used to enforce a deletion policy on this data.
   618    CreationTime TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true),
   619  ) PRIMARY KEY(Project, InvocationID),
   620    ROW DELETION POLICY (OLDER_THAN(CreationTime, INTERVAL 91 DAY));