go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/bqexporter/bqexporter.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/bqexporter/bqexporter.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package bqexporter handles export to BigQuery.
    16  package bqexporter
    17  
    18  import (
    19  	"context"
    20  	"time"
    21  
    22  	"go.chromium.org/luci/bisection/model"
    23  	bqpb "go.chromium.org/luci/bisection/proto/bq"
    24  	pb "go.chromium.org/luci/bisection/proto/v1"
    25  	"go.chromium.org/luci/bisection/util/bqutil"
    26  	"go.chromium.org/luci/bisection/util/datastoreutil"
    27  	"go.chromium.org/luci/common/clock"
    28  	"go.chromium.org/luci/common/errors"
    29  	"go.chromium.org/luci/common/logging"
    30  	"go.chromium.org/luci/gae/service/datastore"
    31  	"go.chromium.org/luci/gae/service/info"
    32  )
    33  
    34  // The number of days to look back for past analyses.
    35  // We only look back and export analyses within the past 14 days.
    36  const daysToLookBack = 14
    37  
    38  // ExportTestAnalyses exports test failure analyses to BigQuery.
    39  // A test failure analysis will be exported if it satisfies the following conditions:
    40  //  1. It has not been exported yet.
    41  //  2. It was created within the past 14 days.
    42  //  3. Has ended.
    43  //  4. If it found culprit, then actions must have been taken.
    44  //
    45  // The limit of 14 days is chosen to save the query time. It is also because if the exporter
    46  // is broken for some reasons, we will have 14 days to fix it.
    47  func ExportTestAnalyses(ctx context.Context) error {
    48  	// TODO (nqmtuan): We should read it from config.
    49  	// But currently we only have per-project config, not service config.
    50  	// So for now we are hard-coding it.
    51  	if !isEnabled(ctx) {
    52  		logging.Warningf(ctx, "export test analyses is not enabled")
    53  	}
    54  
    55  	client, err := NewClient(ctx, info.AppID(ctx))
    56  	if err != nil {
    57  		return errors.Annotate(err, "new client").Err()
    58  	}
    59  	defer client.Close()
    60  	err = export(ctx, client)
    61  	if err != nil {
    62  		return errors.Annotate(err, "export").Err()
    63  	}
    64  	return nil
    65  }
    66  
    67  type ExportClient interface {
    68  	EnsureSchema(ctx context.Context) error
    69  	Insert(ctx context.Context, rows []*bqpb.TestAnalysisRow) error
    70  	ReadTestFailureAnalysisRows(ctx context.Context) ([]*TestFailureAnalysisRow, error)
    71  }
    72  
    73  func export(ctx context.Context, client ExportClient) error {
    74  	err := client.EnsureSchema(ctx)
    75  	if err != nil {
    76  		return errors.Annotate(err, "ensure schema").Err()
    77  	}
    78  
    79  	analyses, err := fetchTestAnalyses(ctx)
    80  	if err != nil {
    81  		return errors.Annotate(err, "fetch test analyses").Err()
    82  	}
    83  	logging.Infof(ctx, "There are %d test analyses fetched from datastore", len(analyses))
    84  
    85  	// Read existing rows from bigquery.
    86  	bqrows, err := client.ReadTestFailureAnalysisRows(ctx)
    87  	if err != nil {
    88  		return errors.Annotate(err, "read test failure analysis rows").Err()
    89  	}
    90  	logging.Infof(ctx, "There are %d existing rows in BigQuery", len(bqrows))
    91  
    92  	// Filter out existing rows.
    93  	// Construct a map for fast filtering.
    94  	existingIDs := map[int64]bool{}
    95  	for _, r := range bqrows {
    96  		existingIDs[r.AnalysisID] = true
    97  	}
    98  
    99  	// Construct BQ rows.
   100  	rowsToInsert := []*bqpb.TestAnalysisRow{}
   101  	for _, tfa := range analyses {
   102  		if _, ok := existingIDs[tfa.ID]; !ok {
   103  			row, err := bqutil.TestFailureAnalysisToBqRow(ctx, tfa)
   104  			if err != nil {
   105  				return errors.Annotate(err, "test failure analysis to bq row for analysis ID: %d", tfa.ID).Err()
   106  			}
   107  			rowsToInsert = append(rowsToInsert, row)
   108  		}
   109  	}
   110  	logging.Infof(ctx, "After filtering, there are %d rows to insert to BigQuery.", len(rowsToInsert))
   111  
   112  	// Insert into BQ.
   113  	err = client.Insert(ctx, rowsToInsert)
   114  	if err != nil {
   115  		return errors.Annotate(err, "insert").Err()
   116  	}
   117  	return nil
   118  }
   119  
   120  // fetchTestAnalyses returns the test analyses that:
   121  // - Created within 14 days
   122  // - Has ended
   123  // - If it found a culprit, then either the actions have been taken,
   124  // or the it has ended more than 1 day ago.
   125  func fetchTestAnalyses(ctx context.Context) ([]*model.TestFailureAnalysis, error) {
   126  	// Query all analyses within 14 days.
   127  	cutoffTime := clock.Now(ctx).Add(-time.Hour * 24 * daysToLookBack)
   128  	q := datastore.NewQuery("TestFailureAnalysis").Gt("create_time", cutoffTime).Order("-create_time")
   129  	analyses := []*model.TestFailureAnalysis{}
   130  	err := datastore.GetAll(ctx, q, &analyses)
   131  	if err != nil {
   132  		return nil, errors.Annotate(err, "get test analyses").Err()
   133  	}
   134  
   135  	// Check that the analyses ended and actions were taken.
   136  	results := []*model.TestFailureAnalysis{}
   137  	for _, tfa := range analyses {
   138  		// Ignore all analyses that have not ended.
   139  		if !tfa.HasEnded() {
   140  			continue
   141  		}
   142  		// If the analyses did not find any culprit, then we don't
   143  		// need to check for culprit actions.
   144  		if tfa.Status != pb.AnalysisStatus_FOUND {
   145  			results = append(results, tfa)
   146  			continue
   147  		}
   148  
   149  		//Get culprit.
   150  		culprit, err := datastoreutil.GetVerifiedCulpritForTestAnalysis(ctx, tfa)
   151  		if err != nil {
   152  			return nil, errors.Annotate(err, "get verified culprit").Err()
   153  		}
   154  		if culprit == nil {
   155  			return nil, errors.Reason("no culprit found for analysis %d", tfa.ID).Err()
   156  		}
   157  
   158  		// Make an exception: If an analysis ended more than 1 day ago, and
   159  		// HasTakenActions is still set to false, most likely something was stuck
   160  		// that prevent the filed from being set. In this case, we want to
   161  		// export the analysis anyway, since there will be no changes to it.
   162  		// It also let us export the analyses without suspect's HasTakenActions field set.
   163  		oneDayAgo := clock.Now(ctx).Add(-time.Hour * 24)
   164  		if !culprit.HasTakenActions && tfa.EndTime.Before(oneDayAgo) {
   165  			// Logging for visibility.
   166  			logging.Warningf(ctx, "Analysis %d has ended more than a day ago, but actions are not taken", tfa.ID)
   167  		}
   168  
   169  		if culprit.HasTakenActions || tfa.EndTime.Before(oneDayAgo) {
   170  			results = append(results, tfa)
   171  		}
   172  	}
   173  	return results, nil
   174  }
   175  
   176  func isEnabled(ctx context.Context) bool {
   177  	// return info.AppID(ctx) == "luci-bisection-dev"
   178  	return true
   179  }