go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/pubsub/buildbucket.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pubsub handles pub/sub messages
    16  package pubsub
    17  
    18  import (
    19  	"bytes"
    20  	"compress/zlib"
    21  	"context"
    22  	"encoding/json"
    23  	"fmt"
    24  	"io"
    25  	"net/http"
    26  
    27  	"google.golang.org/protobuf/encoding/protojson"
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"go.chromium.org/luci/bisection/compilefailuredetection"
    31  	"go.chromium.org/luci/bisection/internal/config"
    32  	"go.chromium.org/luci/bisection/metrics"
    33  	"go.chromium.org/luci/bisection/rerun"
    34  	taskpb "go.chromium.org/luci/bisection/task/proto"
    35  	"go.chromium.org/luci/bisection/util"
    36  	"go.chromium.org/luci/bisection/util/loggingutil"
    37  	buildbucketpb "go.chromium.org/luci/buildbucket/proto"
    38  	"go.chromium.org/luci/common/errors"
    39  	"go.chromium.org/luci/common/logging"
    40  	"go.chromium.org/luci/common/retry/transient"
    41  	"go.chromium.org/luci/common/tsmon/field"
    42  	"go.chromium.org/luci/common/tsmon/metric"
    43  	"go.chromium.org/luci/server/router"
    44  	"go.chromium.org/luci/server/tq"
    45  )
    46  
    47  var (
    48  	bbCounter = metric.NewCounter(
    49  		"bisection/ingestion/buildbucket",
    50  		"The number of Buildbucket pubsub received, by project and outcome.",
    51  		nil,
    52  		// The LUCI Project.
    53  		field.String("project"),
    54  		// The outcome action of the ingestion
    55  		// "unsupported", "update_rerun", "update_succeeded_build", "ignore", "analyze"
    56  		field.String("outcome"),
    57  	)
    58  	rerunCounter = metric.NewCounter(
    59  		"bisection/ingestion/rerun",
    60  		"The number of rerun build result, by project, status and type.",
    61  		nil,
    62  		// The LUCI Project.
    63  		field.String("project"),
    64  		// The status of the rerun build.
    65  		// The possible values are "SUCCESS", "FAILURE", "INFRA_FAILURE", "CANCELED".
    66  		field.String("status"),
    67  		// The type of the analysis that rerun belongs to.
    68  		// The possible values are "compile", "test".
    69  		field.String("type"),
    70  	)
    71  )
    72  
    73  // OutcomeType is used for sending metrics to tsmon
    74  type OutcomeType string
    75  
    76  const (
    77  	OutcomeTypeUnsupported          OutcomeType = "unsupported"
    78  	OutcomeTypeUpdateRerun          OutcomeType = "update_rerun"
    79  	OutcomeTypeUpdateSucceededBuild OutcomeType = "update_succeeded_build"
    80  	OutcomeTypeIgnore               OutcomeType = "ignore"
    81  	OutcomeTypeAnalyze              OutcomeType = "analyze"
    82  )
    83  
    84  type pubsubMessage struct {
    85  	Message struct {
    86  		Data       []byte
    87  		Attributes map[string]any
    88  	}
    89  }
    90  
    91  // BuildbucketPubSubHandler handles pub/sub messages from buildbucket
    92  func BuildbucketPubSubHandler(ctx *router.Context) {
    93  	if err := buildbucketPubSubHandlerImpl(ctx.Request.Context(), ctx.Request); err != nil {
    94  		logging.Errorf(ctx.Request.Context(), "Error processing buildbucket pubsub message: %s", err)
    95  		processError(ctx, err)
    96  		return
    97  	}
    98  	// Just returns OK here so pubsub does not resend the message
    99  	ctx.Writer.WriteHeader(http.StatusOK)
   100  }
   101  
   102  func processError(ctx *router.Context, err error) {
   103  	if transient.Tag.In(err) {
   104  		// Pubsub will retry this
   105  		ctx.Writer.WriteHeader(http.StatusInternalServerError)
   106  	} else {
   107  		// Pubsub will not retry those errors
   108  		ctx.Writer.WriteHeader(http.StatusAccepted)
   109  	}
   110  }
   111  
   112  func buildbucketPubSubHandlerImpl(c context.Context, r *http.Request) error {
   113  	var psMsg pubsubMessage
   114  	if err := json.NewDecoder(r.Body).Decode(&psMsg); err != nil {
   115  		return errors.Annotate(err, "could not decode message").Err()
   116  	}
   117  
   118  	// Handle the message from `builds_v2` pubsub topic.
   119  	if v, ok := psMsg.Message.Attributes["version"].(string); ok && v == "v2" {
   120  		logging.Debugf(c, "Got message from v2")
   121  		bbmsg, err := parseBBV2Message(c, psMsg)
   122  		if err != nil {
   123  			return errors.Annotate(err, "unmarshal buildbucket v2 pub/sub message").Err()
   124  		}
   125  
   126  		bbid := bbmsg.GetBuild().GetId()
   127  		project := bbmsg.GetBuild().GetBuilder().GetProject()
   128  		bucket := bbmsg.GetBuild().GetBuilder().GetBucket()
   129  		builder := bbmsg.GetBuild().GetBuilder().GetBuilder()
   130  		status := bbmsg.GetBuild().GetStatus()
   131  
   132  		c = loggingutil.SetAnalyzedBBID(c, bbid)
   133  		logging.Debugf(c, "Received message for build id %d", bbid)
   134  
   135  		// Special handling for pubsub message for compile failure for
   136  		// LUCI Bisection.
   137  		// This is only triggered for rerun builds.
   138  		compileBuilder, err := config.GetCompileBuilder(c, project)
   139  		if err != nil {
   140  			// If there are no configs for the project, just ignore.
   141  			if !errors.Is(err, config.ErrNotFoundProjectConfig) {
   142  				return errors.Annotate(err, "get compile builder").Err()
   143  			}
   144  		} else {
   145  			if bucket == compileBuilder.Bucket && builder == compileBuilder.Builder {
   146  				logging.Infof(c, "Received pubsub for luci bisection compile rerun build %d status %s", bbid, buildbucketpb.Status_name[int32(status)])
   147  				bbCounter.Add(c, 1, project, string(OutcomeTypeUpdateRerun))
   148  
   149  				// We only update the rerun counter after the build finished.
   150  				// Status_ENDED_MASK is a union of all terminal statuses.
   151  				if status&buildbucketpb.Status_ENDED_MASK == buildbucketpb.Status_ENDED_MASK {
   152  					rerunCounter.Add(c, 1, project, status.String(), string(metrics.AnalysisTypeCompile))
   153  				}
   154  
   155  				if bbmsg.Build.Status != buildbucketpb.Status_SCHEDULED {
   156  					return rerun.UpdateCompileRerunStatus(c, bbid)
   157  				}
   158  				return nil
   159  			}
   160  		}
   161  
   162  		// Handle test rerun build.
   163  		testBuilder, err := config.GetTestBuilder(c, project)
   164  		if err != nil {
   165  			// If there are no configs for the project, just ignore.
   166  			if !errors.Is(err, config.ErrNotFoundProjectConfig) {
   167  				return errors.Annotate(err, "get test builder").Err()
   168  			}
   169  		} else {
   170  			if bucket == testBuilder.Bucket && builder == testBuilder.Builder {
   171  				logging.Infof(c, "Test bisection: received pubsub for rerun build %d status %s", bbid, buildbucketpb.Status_name[int32(status)])
   172  				bbCounter.Add(c, 1, project, string(OutcomeTypeUpdateRerun))
   173  
   174  				// We only update the rerun counter after the build finished.
   175  				// Status_ENDED_MASK is a union of all terminal statuses.
   176  				if status&buildbucketpb.Status_ENDED_MASK == buildbucketpb.Status_ENDED_MASK {
   177  					rerunCounter.Add(c, 1, project, status.String(), string(metrics.AnalysisTypeTest))
   178  				}
   179  
   180  				if bbmsg.Build.Status != buildbucketpb.Status_SCHEDULED {
   181  					return rerun.UpdateTestRerunStatus(c, bbmsg.GetBuild())
   182  				}
   183  				return nil
   184  			}
   185  		}
   186  
   187  		// For now, we only handle chromium/ci builds
   188  		// TODO (nqmtuan): Move this into config
   189  		if !(project == "chromium" && bucket == "ci") {
   190  			logging.Debugf(c, "Unsupported build for bucket (%q, %q). Exiting early...", project, bucket)
   191  			bbCounter.Add(c, 1, project, string(OutcomeTypeUnsupported))
   192  			return nil
   193  		}
   194  
   195  		excludedBgs, err := config.GetExcludedBuilderGroupsForCompile(c, project)
   196  		if err != nil {
   197  			return errors.Annotate(err, "get excluded builder groups for compile").Err()
   198  		}
   199  		// Pubsub message stores input properties in large fields.
   200  		largeFieldsData, err := zlibDecompress(bbmsg.BuildLargeFields)
   201  		if err != nil {
   202  			return errors.Annotate(err, "decompress large field").Err()
   203  		}
   204  		largeFields := &buildbucketpb.Build{}
   205  		if err := proto.Unmarshal(largeFieldsData, largeFields); err != nil {
   206  			return errors.Annotate(err, "unmarshal large field").Err()
   207  		}
   208  
   209  		builderGroup := util.GetBuilderGroup(largeFields)
   210  		if builderGroup != "" {
   211  			for _, excludedBg := range excludedBgs {
   212  				if builderGroup == excludedBg {
   213  					logging.Debugf(c, "Builder group is excluded %s. Exiting early...", builderGroup)
   214  					bbCounter.Add(c, 1, project, string(OutcomeTypeUnsupported))
   215  					return nil
   216  				}
   217  			}
   218  		}
   219  
   220  		// Just ignore non-successful and non-failed builds
   221  		if status != buildbucketpb.Status_SUCCESS && status != buildbucketpb.Status_FAILURE {
   222  			logging.Debugf(c, "Build status = %s. Exiting early...", status)
   223  			bbCounter.Add(c, 1, project, string(OutcomeTypeIgnore))
   224  			return nil
   225  		}
   226  
   227  		// If the build is succeeded -> some running analysis may not be necessary
   228  		if bbmsg.Build.Status == buildbucketpb.Status_SUCCESS {
   229  			bbCounter.Add(c, 1, project, string(OutcomeTypeUpdateSucceededBuild))
   230  			err := compilefailuredetection.UpdateSucceededBuild(c, bbid)
   231  			if err != nil {
   232  				return errors.Annotate(err, "UpdateSucceededBuild").Err()
   233  			}
   234  			return nil
   235  		}
   236  
   237  		// Create a task for task queue
   238  		err = tq.AddTask(c, &tq.Task{
   239  			Title: fmt.Sprintf("failed_build_%d", bbid),
   240  			Payload: &taskpb.FailedBuildIngestionTask{
   241  				Bbid: bbid,
   242  			},
   243  		})
   244  
   245  		if err != nil {
   246  			logging.Errorf(c, "Failed creating task in task queue for build %d", bbid)
   247  			return err
   248  		}
   249  		bbCounter.Add(c, 1, project, string(OutcomeTypeAnalyze))
   250  	}
   251  	return nil
   252  }
   253  
   254  func parseBBV2Message(ctx context.Context, pbMsg pubsubMessage) (*buildbucketpb.BuildsV2PubSub, error) {
   255  	buildsV2Msg := &buildbucketpb.BuildsV2PubSub{}
   256  	opts := protojson.UnmarshalOptions{AllowPartial: true, DiscardUnknown: true}
   257  	if err := opts.Unmarshal(pbMsg.Message.Data, buildsV2Msg); err != nil {
   258  		return nil, err
   259  	}
   260  	return buildsV2Msg, nil
   261  }
   262  
   263  // zlibDecompress decompresses data using zlib.
   264  func zlibDecompress(compressed []byte) ([]byte, error) {
   265  	r, err := zlib.NewReader(bytes.NewReader(compressed))
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	defer func() { _ = r.Close() }()
   270  	return io.ReadAll(r)
   271  }