go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/views/ensure_views_cron.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package views contains methods to interact with BigQuery views.
    16  package views
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"strings"
    22  	"time"
    23  
    24  	"cloud.google.com/go/bigquery"
    25  	"google.golang.org/api/iterator"
    26  
    27  	"go.chromium.org/luci/common/bq"
    28  	"go.chromium.org/luci/common/errors"
    29  	"go.chromium.org/luci/common/logging"
    30  
    31  	"go.chromium.org/luci/analysis/internal/bqutil"
    32  	"go.chromium.org/luci/analysis/pbutil"
    33  )
    34  
    35  const rulesViewBaseQuery = `
    36  	WITH items AS (
    37  		SELECT
    38  		project,
    39  		rule_id,
    40  		ARRAY_AGG(rh1 ORDER BY rh1.last_update_time DESC LIMIT 1)[OFFSET(0)] as row
    41  		FROM internal.failure_association_rules_history rh1
    42  		GROUP BY rh1.project, rh1.rule_id
    43  	)
    44  	SELECT
    45  		project,
    46  		rule_id,
    47  		row.* EXCEPT(project, rule_id)
    48  	FROM items`
    49  
    50  const segmentsUnexpectedRealtimeQuery = `
    51  	WITH merged_table AS(
    52  		SELECT *
    53  		FROM internal.test_variant_segment_updates
    54  		WHERE has_recent_unexpected_results = 1
    55  		UNION ALL
    56  		SELECT *
    57  		FROM internal.test_variant_segments
    58  		WHERE has_recent_unexpected_results = 1
    59  	), merged_table_grouped AS(
    60  		SELECT
    61  			project, test_id, variant_hash, ref_hash,
    62  			ARRAY_AGG(m ORDER BY version DESC LIMIT 1)[OFFSET(0)] as row
    63  		FROM merged_table m
    64  		GROUP BY project, test_id, variant_hash, ref_hash
    65  	)
    66  	SELECT
    67  		project, test_id, variant_hash, ref_hash,
    68  		row.variant AS variant,
    69  		row.ref AS ref,
    70  		-- Omit has_recent_unexpected_results here as all rows have unexpected results.
    71  		row.segments AS segments,
    72  		row.version AS version
    73  	FROM merged_table_grouped`
    74  
    75  // TODO(beining@): update this to query from the internal.test_variant_segments_unexpected_realtime after its been created.
    76  const segmentsUnexpectedRealtimePerProjectQuery = `
    77  	WITH merged_table AS(
    78  		SELECT *
    79  		FROM internal.test_variant_segment_updates
    80  		WHERE project = "%[1]s" AND has_recent_unexpected_results = 1
    81  		UNION ALL
    82  		SELECT *
    83  		FROM internal.test_variant_segments
    84  		WHERE project = "%[1]s" AND has_recent_unexpected_results = 1
    85  	), merged_table_grouped AS(
    86  		SELECT
    87  			project, test_id, variant_hash, ref_hash,
    88  			ARRAY_AGG(m ORDER BY version DESC LIMIT 1)[OFFSET(0)] as row
    89  		FROM merged_table m
    90  		GROUP BY project, test_id, variant_hash, ref_hash
    91  	)
    92  	SELECT
    93  		project, test_id, variant_hash, ref_hash,
    94  		row.variant AS variant,
    95  		row.ref AS ref,
    96  		-- Omit has_recent_unexpected_results here as all rows have unexpected results.
    97  		row.segments AS segments,
    98  		row.version AS version
    99  	FROM merged_table_grouped`
   100  
   101  var datasetViewQueries = map[string]map[string]*bigquery.TableMetadata{
   102  	"internal": {
   103  		"failure_association_rules": &bigquery.TableMetadata{
   104  			ViewQuery: rulesViewBaseQuery,
   105  			Labels:    map[string]string{bq.MetadataVersionKey: "2"},
   106  		},
   107  		"test_variant_segments_unexpected_realtime": &bigquery.TableMetadata{
   108  			Description: "Contains test variant histories segmented by change point analysis, limited to test variants with unexpected" +
   109  				" results in postsubmit in the last 90 days. See go/luci-test-variant-analysis-design.",
   110  			ViewQuery: segmentsUnexpectedRealtimeQuery,
   111  			Labels:    map[string]string{bq.MetadataVersionKey: "1"},
   112  		}},
   113  }
   114  
   115  type makeTableMetadata func(luciProject string) *bigquery.TableMetadata
   116  
   117  var luciProjectViewQueries = map[string]makeTableMetadata{
   118  	"failure_association_rules": func(luciProject string) *bigquery.TableMetadata {
   119  		// Revalidate project as safeguard against SQL-Injection.
   120  		if err := pbutil.ValidateProject(luciProject); err != nil {
   121  			panic(err)
   122  		}
   123  
   124  		return &bigquery.TableMetadata{
   125  			Description: "Failure association rules for " + luciProject + ". See go/luci-analysis-concepts#failure-association-rules.",
   126  			ViewQuery:   `SELECT * FROM internal.failure_association_rules WHERE project = "` + luciProject + `"`,
   127  			Labels:      map[string]string{bq.MetadataVersionKey: "1"},
   128  		}
   129  	},
   130  	"clustered_failures": func(luciProject string) *bigquery.TableMetadata {
   131  		// Revalidate project as safeguard against SQL-Injection.
   132  		if err := pbutil.ValidateProject(luciProject); err != nil {
   133  			panic(err)
   134  		}
   135  		return &bigquery.TableMetadata{
   136  			Description: "Clustered test failures for " + luciProject + ". Each failure is repeated for each cluster it is contained in.",
   137  			ViewQuery:   `SELECT * FROM internal.clustered_failures WHERE project = "` + luciProject + `"`,
   138  			Labels:      map[string]string{bq.MetadataVersionKey: "1"},
   139  		}
   140  	},
   141  	"cluster_summaries": func(luciProject string) *bigquery.TableMetadata {
   142  		// Revalidate project as safeguard against SQL-Injection.
   143  		if err := pbutil.ValidateProject(luciProject); err != nil {
   144  			panic(err)
   145  		}
   146  		return &bigquery.TableMetadata{
   147  			Description: "Test failure clusters for " + luciProject + " with cluster metrics. Periodically updated from clustered_failures table with ~15 minute staleness.",
   148  			ViewQuery:   `SELECT * FROM internal.cluster_summaries WHERE project = "` + luciProject + `"`,
   149  			Labels:      map[string]string{bq.MetadataVersionKey: "1"},
   150  		}
   151  	},
   152  	"test_verdicts": func(luciProject string) *bigquery.TableMetadata {
   153  		// Revalidate project as safeguard against SQL-Injection.
   154  		if err := pbutil.ValidateProject(luciProject); err != nil {
   155  			panic(err)
   156  		}
   157  		return &bigquery.TableMetadata{
   158  			Description: "Contains all test verdicts produced by " + luciProject + ". See go/luci-analysis-verdict-export-proposal.",
   159  			ViewQuery:   `SELECT * FROM internal.test_verdicts WHERE project = "` + luciProject + `"`,
   160  			Labels:      map[string]string{bq.MetadataVersionKey: "1"},
   161  		}
   162  	},
   163  	"test_variant_segments": func(luciProject string) *bigquery.TableMetadata {
   164  		// Revalidate project as safeguard against SQL-Injection.
   165  		if err := pbutil.ValidateProject(luciProject); err != nil {
   166  			panic(err)
   167  		}
   168  		return &bigquery.TableMetadata{
   169  			Description: "Contains test variant histories segmented by change point analysis. See go/luci-test-variant-analysis-design.",
   170  			ViewQuery:   `SELECT * FROM internal.test_variant_segments WHERE project = "` + luciProject + `"`,
   171  			Labels:      map[string]string{bq.MetadataVersionKey: "1"},
   172  		}
   173  	},
   174  	"test_variant_segments_unexpected_realtime": func(luciProject string) *bigquery.TableMetadata {
   175  		// Revalidate project as safeguard against SQL-Injection.
   176  		if err := pbutil.ValidateProject(luciProject); err != nil {
   177  			panic(err)
   178  		}
   179  		viewQuery := fmt.Sprintf(segmentsUnexpectedRealtimePerProjectQuery, luciProject)
   180  		return &bigquery.TableMetadata{
   181  			Description: "Contains test variant histories segmented by change point analysis, limited to test variants with unexpected" +
   182  				" results in postsubmit in the last 90 days. See go/luci-test-variant-analysis-design.",
   183  			ViewQuery: viewQuery,
   184  			Labels:    map[string]string{bq.MetadataVersionKey: "2"},
   185  		}
   186  	},
   187  }
   188  
   189  // CronHandler is then entry-point for the ensure views cron job.
   190  func CronHandler(ctx context.Context, gcpProject string) (retErr error) {
   191  	client, err := bqutil.Client(ctx, gcpProject)
   192  	if err != nil {
   193  		return errors.Annotate(err, "create bq client").Err()
   194  	}
   195  	defer func() {
   196  		if err := client.Close(); err != nil && retErr == nil {
   197  			retErr = errors.Annotate(err, "closing bq client").Err()
   198  		}
   199  	}()
   200  	if err := ensureViews(ctx, client); err != nil {
   201  		logging.Errorf(ctx, "ensure views: %s", err)
   202  		return err
   203  	}
   204  	return nil
   205  }
   206  
   207  func ensureViews(ctx context.Context, bqClient *bigquery.Client) error {
   208  	// Create views for individual datasets.
   209  	for datasetID, tableSpecs := range datasetViewQueries {
   210  		for tableName, spec := range tableSpecs {
   211  			table := bqClient.Dataset(datasetID).Table(tableName)
   212  			if err := bq.EnsureTable(ctx, table, spec, bq.UpdateMetadata(), bq.RefreshViewInterval(time.Hour)); err != nil {
   213  				return errors.Annotate(err, "ensure view %s", tableName).Err()
   214  			}
   215  		}
   216  	}
   217  	// Get datasets for LUCI projects.
   218  	datasetIDs, err := projectDatasets(ctx, bqClient)
   219  	if err != nil {
   220  		return errors.Annotate(err, "get LUCI project datasets").Err()
   221  	}
   222  	// Create views that is common to each LUCI project's dataset.
   223  	for _, projectDatasetID := range datasetIDs {
   224  		if err := createViewsForLUCIDataset(ctx, bqClient, projectDatasetID); err != nil {
   225  			return errors.Annotate(err, "ensure view for LUCI project dataset %s", projectDatasetID).Err()
   226  		}
   227  	}
   228  	return nil
   229  }
   230  
   231  // createViewsForLUCIDataset creates views with the given tableSpecs under the given datasetID
   232  func createViewsForLUCIDataset(ctx context.Context, bqClient *bigquery.Client, datasetID string) error {
   233  	luciProject, err := bqutil.ProjectForDataset(datasetID)
   234  	if err != nil {
   235  		return errors.Annotate(err, "get LUCI project with dataset name %s", datasetID).Err()
   236  	}
   237  	for tableName, specFunc := range luciProjectViewQueries {
   238  		table := bqClient.Dataset(datasetID).Table(tableName)
   239  		spec := specFunc(luciProject)
   240  		if err := bq.EnsureTable(ctx, table, spec, bq.UpdateMetadata(), bq.RefreshViewInterval(time.Hour)); err != nil {
   241  			return errors.Annotate(err, "ensure view %s", tableName).Err()
   242  		}
   243  	}
   244  	return nil
   245  }
   246  
   247  // projectDatasets returns all project datasets in the GCP Project.
   248  // E.g. "chromium", "chromeos", ....
   249  func projectDatasets(ctx context.Context, bqClient *bigquery.Client) ([]string, error) {
   250  	var datasets []string
   251  	di := bqClient.Datasets(ctx)
   252  	for {
   253  		d, err := di.Next()
   254  		if err == iterator.Done {
   255  			break
   256  		} else if err != nil {
   257  			return nil, err
   258  		}
   259  		// The internal dataset is a special dataset that does
   260  		// not belong to a LUCI project.
   261  		if strings.EqualFold(d.DatasetID, bqutil.InternalDatasetID) {
   262  			continue
   263  		}
   264  		// Same for the experiments dataset.
   265  		if strings.EqualFold(d.DatasetID, "experiments") {
   266  			continue
   267  		}
   268  		datasets = append(datasets, d.DatasetID)
   269  	}
   270  	return datasets, nil
   271  }