go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/bqexporter/merge_table.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqexporter
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"cloud.google.com/go/bigquery"
    22  
    23  	"go.chromium.org/luci/common/errors"
    24  	"go.chromium.org/luci/common/logging"
    25  
    26  	"go.chromium.org/luci/analysis/internal/bqutil"
    27  	"go.chromium.org/luci/analysis/internal/config"
    28  )
    29  
    30  // MergeTables is the entry point of the merge-test-variant-branches cron job.
    31  // It runs use DML merge to merge the data from test_variant_segment_updates
    32  // table to test_variant_segments table.
    33  func MergeTables(ctx context.Context, gcpProject string) (retErr error) {
    34  	enabled, err := shouldMergeTable(ctx)
    35  	if err != nil {
    36  		return err
    37  	}
    38  	if !enabled {
    39  		logging.Infof(ctx, "Skipped because export is not enabled")
    40  		return nil
    41  	}
    42  
    43  	client, err := bqutil.Client(ctx, gcpProject)
    44  	if err != nil {
    45  		return errors.Annotate(err, "create bq client").Err()
    46  	}
    47  	defer func() {
    48  		if err := client.Close(); err != nil && retErr == nil {
    49  			retErr = errors.Annotate(err, "closing bq client").Err()
    50  		}
    51  	}()
    52  
    53  	err = ensureTestVariantSegmentsSchema(ctx, client)
    54  	if err != nil {
    55  		return errors.Annotate(err, "ensure schema").Err()
    56  	}
    57  
    58  	// DML merge from test-variant-segment-updates to test-variant-segments table.
    59  	err = runDMLMerge(ctx, client)
    60  	if err != nil {
    61  		return errors.Annotate(err, "run DML merge").Err()
    62  	}
    63  
    64  	return nil
    65  }
    66  
    67  // runDMLMerge merges data from test_variant_segment_updates table to
    68  // test_variant_segments table.
    69  func runDMLMerge(ctx context.Context, client *bigquery.Client) error {
    70  	q := client.Query(`
    71  		MERGE test_variant_segments T
    72  			USING (
    73  				SELECT
    74  					ARRAY_AGG(u ORDER BY version DESC LIMIT 1)[OFFSET(0)] as row
    75  				FROM test_variant_segment_updates u
    76  				GROUP BY project, test_id, variant_hash, ref_hash
    77  			) S
    78  		ON T.project = S.row.project AND T.test_id = S.row.test_id AND T.variant_hash = S.row.variant_hash AND T.ref_hash = S.row.ref_hash
    79  		-- Row in source is newer than target, update.
    80  		WHEN MATCHED AND S.row.version > T.version THEN
    81  			UPDATE SET T.has_recent_unexpected_results = S.row.has_recent_unexpected_results, T.segments = S.row.segments, T.version=S.row.version
    82  		-- Row in source that does not exist in target. Insert.
    83  		WHEN NOT MATCHED BY TARGET THEN
    84  			INSERT (project, test_id, variant_hash, ref_hash, variant, ref, segments, has_recent_unexpected_results, version) VALUES (S.row.project, S.row.test_id, S.row.variant_hash, S.row.ref_hash, S.row.variant, S.row.ref, S.row.segments, S.row.has_recent_unexpected_results, S.row.version)
    85  		-- Delete rows from target older than 90 days that are not being updated.
    86  		WHEN NOT MATCHED BY SOURCE AND T.version < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 90 DAY) THEN
    87  			DELETE;
    88  		-- If the merge is successful, delete data from test_variant_segment_updates table.
    89  		-- We are conservative and only delete the data older than 20 minutes ago.
    90  		-- It may result in some duplication among merges, but it should not affect
    91  		-- the correctness of data.
    92  		DELETE FROM test_variant_segment_updates
    93  			WHERE version < TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 20 MINUTE);
    94  	`)
    95  	q.DefaultDatasetID = bqutil.InternalDatasetID
    96  
    97  	job, err := q.Run(ctx)
    98  	if err != nil {
    99  		return errors.Annotate(err, "initiate merge query").Err()
   100  	}
   101  
   102  	waitCtx, cancel := context.WithTimeout(ctx, time.Minute*9)
   103  	defer cancel()
   104  	js, err := bqutil.WaitForJob(waitCtx, job)
   105  	if err != nil {
   106  		return errors.Annotate(err, "waiting for merging to complete").Err()
   107  	}
   108  	if js.Err() != nil {
   109  		return errors.Annotate(js.Err(), "merge rows failed").Err()
   110  	}
   111  	return nil
   112  }
   113  
   114  func ensureTestVariantSegmentsSchema(ctx context.Context, client *bigquery.Client) error {
   115  	table := client.Dataset(bqutil.InternalDatasetID).Table(stableTableName)
   116  	if err := schemaApplyer.EnsureTable(ctx, table, tableMetadata); err != nil {
   117  		return errors.Annotate(err, "ensuring test_variant_segments table").Err()
   118  	}
   119  	return nil
   120  }
   121  
   122  // shouldMergeTable checks config to see whether bq exporter is enabled.
   123  func shouldMergeTable(ctx context.Context) (bool, error) {
   124  	cfg, err := config.Get(ctx)
   125  	if err != nil {
   126  		return false, errors.Annotate(err, "read config").Err()
   127  	}
   128  	if !cfg.GetTestVariantAnalysis().GetEnabled() {
   129  		return false, nil
   130  	}
   131  	if !cfg.GetTestVariantAnalysis().GetBigqueryExportEnabled() {
   132  		return false, nil
   133  	}
   134  	return true, nil
   135  }