go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/bqutil/job.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bqutil
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"cloud.google.com/go/bigquery"
    22  
    23  	"go.chromium.org/luci/common/errors"
    24  	"go.chromium.org/luci/common/logging"
    25  
    26  	"go.chromium.org/luci/analysis/internal/bugs"
    27  )
    28  
    29  // WaitForJob waits for a BigQuery job to finish.
    30  // If after timeout and the job has not finished, it will attempt
    31  // to cancel the job. The cancellation is based on best-effort,
    32  // so if there is an error, we just log instead of throwing the error.
    33  // This is to avoid jobs overrunning each other and triggering
    34  // a death spiral of write contention / starving each other of resources.
    35  // The actual timeout for bigquery job will be context timeout reduced
    36  // by 5 seconds. It is for the cancelling job to execute.
    37  // If the context does not have a deadline, the bigquery job will
    38  // have no timeout.
    39  func WaitForJob(ctx context.Context, job *bigquery.Job) (*bigquery.JobStatus, error) {
    40  	waitCtx, cancel := bugs.Shorten(ctx, time.Second*5)
    41  
    42  	defer func() {
    43  		// Cancel the waitCtx and release all resource.
    44  		cancel()
    45  
    46  		// Cancel the big query job if it has not finished.
    47  		js, err := job.Status(ctx)
    48  		if err != nil {
    49  			// Non critical, just log.
    50  			err = errors.Annotate(err, "get bigquery status").Err()
    51  			logging.Errorf(ctx, err.Error())
    52  			return
    53  		}
    54  		if !js.Done() {
    55  			err = job.Cancel(ctx)
    56  			if err != nil {
    57  				// Non critical, just log.
    58  				err = errors.Annotate(err, "cancel bigquery job").Err()
    59  				logging.Errorf(ctx, err.Error())
    60  			}
    61  		}
    62  	}()
    63  
    64  	js, err := job.Wait(waitCtx)
    65  	if err != nil {
    66  		return nil, errors.Annotate(err, "wait for job").Err()
    67  	}
    68  	return js, nil
    69  }