go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/host/buildmerge/build_state.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package buildmerge
    16  
    17  import (
    18  	"bytes"
    19  	"compress/zlib"
    20  	"context"
    21  	"io"
    22  	"sync"
    23  
    24  	"google.golang.org/protobuf/proto"
    25  
    26  	bbpb "go.chromium.org/luci/buildbucket/proto"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/sync/dispatcher"
    29  	"go.chromium.org/luci/common/sync/dispatcher/buffer"
    30  	"go.chromium.org/luci/logdog/api/logpb"
    31  	"go.chromium.org/luci/logdog/common/types"
    32  )
    33  
    34  // buildState represents the current state of a single build.proto stream.
    35  type buildState struct {
    36  	// build holds the most recently processed Build state. This message should be
    37  	// treated as immutable (i.e. proto.Clone before modifying it).
    38  	//
    39  	// This may be `nil` until the first user-supplied build.proto is processed,
    40  	// or until the buildStateTracker closes.
    41  	build *bbpb.Build
    42  
    43  	// buildReadOnly holds the most recently processed Build state to read.
    44  	// This message should be treated as immutable (i.e. proto.Clone before modifying it).
    45  	buildReadOnly *bbpb.Build
    46  
    47  	// closed is set to true when the build state is terminated and will receive
    48  	// no more user updates (but may still need to be finalized()).
    49  	closed bool
    50  
    51  	// final is set to true when the build state is closed and all final
    52  	// processing has occurred on the build state.
    53  	final bool
    54  
    55  	// invalid is set to true when the interior structure (i.e. Steps) of latest
    56  	// contains invalid data and shouldn't be inspected.
    57  	invalid bool
    58  }
    59  
    60  // buildStateTracker manages the state of a single build.proto datagram stream.
    61  type buildStateTracker struct {
    62  	ctx context.Context
    63  
    64  	// The Agent that this buildStateTracker belongs to. Used to access:
    65  	//   * clockNow
    66  	//   * calculateURLs
    67  	//   * informNewData
    68  	merger *Agent
    69  
    70  	ldNamespace types.StreamName
    71  
    72  	// True iff we should expect zlib-compressed datagrams.
    73  	zlib bool
    74  
    75  	// We use this mutex to synchronize closure and sending operations on the work
    76  	// channel; `work` is configured, if it's running, to immediately accept any
    77  	// items pushed to it, so it's safe to hold this while sending on work.C.
    78  	workMu sync.Mutex
    79  
    80  	// The work channel is configured to only keep the latest incoming datagram.
    81  	// It's send function parses and interprets the Build message.
    82  	// Errors are not reported to the dispatcher.Channel, but are instead recorded
    83  	// in the parsed Build state.
    84  	work       dispatcher.Channel
    85  	workClosed bool // true if we've closed work.C, protected by workMu
    86  
    87  	latestStateMu sync.Mutex
    88  	latestState   *buildState
    89  }
    90  
    91  // updateState updates `state` with the Build.proto message inside the lock.
    92  //
    93  // If there's an error when generating the new build - i.e. when parsing `data`
    94  // or an error in the decoded message's contents, `state.invalid` and
    95  // `state.closed` will be set to true, and `state.build` will be updated with
    96  // the error message.
    97  func (t *buildStateTracker) updateState(newBuild *bbpb.Build, err error) {
    98  	t.latestStateMu.Lock()
    99  	defer t.latestStateMu.Unlock()
   100  	state := *t.latestState
   101  	oldBuild := state.build
   102  
   103  	if state.closed {
   104  		return
   105  	}
   106  
   107  	if err != nil {
   108  		if newBuild == nil {
   109  			if oldBuild == nil {
   110  				newBuild = &bbpb.Build{}
   111  			} else {
   112  				newBuild = oldBuild
   113  			}
   114  		}
   115  		setErrorOnBuild(newBuild, err)
   116  		newBuild.UpdateTime = t.merger.clockNow()
   117  		state.closed = true
   118  		state.invalid = true
   119  	}
   120  
   121  	state.build = newBuild
   122  	// Reset buildReadOnly since we have a new build state now.
   123  	state.buildReadOnly = nil
   124  
   125  	if state.closed {
   126  		t.Close()
   127  	}
   128  
   129  	t.latestState = &state
   130  }
   131  
   132  // parseBuild parses `data` then returns the parsed Build.
   133  func (t *buildStateTracker) parseBuild(data []byte) (*bbpb.Build, error) {
   134  	if t.zlib {
   135  		z, err := zlib.NewReader(bytes.NewBuffer(data))
   136  		if err != nil {
   137  			return nil, errors.Annotate(err, "constructing decompressor for Build").Err()
   138  		}
   139  		data, err = io.ReadAll(z)
   140  		if err != nil {
   141  			return nil, errors.Annotate(err, "decompressing Build").Err()
   142  		}
   143  	}
   144  
   145  	parsedBuild := &bbpb.Build{}
   146  	if err := proto.Unmarshal(data, parsedBuild); err != nil {
   147  		return nil, errors.Annotate(err, "parsing Build").Err()
   148  	}
   149  
   150  	for _, step := range parsedBuild.Steps {
   151  		if len(step.Logs) > 0 && step.Logs[0].Name == "$build.proto" {
   152  			// convert incoming $build.proto logs to MergeBuild messages.
   153  			// If the step has both, then just discard the $build.proto log.
   154  			//
   155  			// TODO(crbug.com/1310155): Remove this conversion after everything
   156  			// emits MergeBuild messages natively.
   157  			if step.MergeBuild == nil {
   158  				step.MergeBuild = &bbpb.Step_MergeBuild{
   159  					FromLogdogStream: step.Logs[0].Url,
   160  				}
   161  			}
   162  			step.Logs = step.Logs[1:]
   163  		}
   164  		for _, log := range step.Logs {
   165  			var err error
   166  			log.Url, log.ViewUrl, err = absolutizeURLs(log.Url, log.ViewUrl, t.ldNamespace, t.merger.calculateURLs)
   167  			if err != nil {
   168  				step.Status = bbpb.Status_INFRA_FAILURE
   169  				step.SummaryMarkdown += err.Error()
   170  				return parsedBuild, errors.Annotate(err, "step[%q].logs[%q]", step.Name, log.Name).Err()
   171  			}
   172  		}
   173  		if mb := step.GetMergeBuild(); mb != nil && mb.FromLogdogStream != "" {
   174  			var err error
   175  			mb.FromLogdogStream, _, err = absolutizeURLs(mb.FromLogdogStream, "", t.ldNamespace, t.merger.calculateURLs)
   176  			if err != nil {
   177  				step.Status = bbpb.Status_INFRA_FAILURE
   178  				step.SummaryMarkdown += err.Error()
   179  				return parsedBuild, errors.Annotate(err, "step[%q].merge_build.from_logdog_stream", step.Name).Err()
   180  			}
   181  		}
   182  	}
   183  	for _, log := range parsedBuild.GetOutput().GetLogs() {
   184  		var err error
   185  		log.Url, log.ViewUrl, err = absolutizeURLs(log.Url, log.ViewUrl, t.ldNamespace, t.merger.calculateURLs)
   186  		if err != nil {
   187  			return parsedBuild, errors.Annotate(err, "build.output.logs[%q]", log.Name).Err()
   188  		}
   189  	}
   190  	parsedBuild.UpdateTime = t.merger.clockNow()
   191  	return parsedBuild, nil
   192  }
   193  
   194  // newBuildStateTracker produces a new buildStateTracker in the given logdog
   195  // namespace.
   196  //
   197  // `ctx` is used for cancellation/logging.
   198  //
   199  // `merger` is the Agent that this buildStateTracker belongs to. See the comment
   200  // in buildStateTracker for its use of this.
   201  //
   202  // `namespace` is the logdog namespace under which this build.proto is being
   203  // streamed from. e.g. if the updates to handleNewData are coming from a logdog
   204  // stream "a/b/c/build.proto", then `namespace` here should be "a/b/c". This is
   205  // used verbatim as the namespace argument to merger.calculateURLs.
   206  //
   207  // if `err` is provided, the buildStateTracker tracker is created in an errored
   208  // (closed) state where getLatest always returns a fixed Build in the
   209  // INFRA_FAILURE state with `err` reflected in the build's SummaryMarkdown
   210  // field.
   211  func newBuildStateTracker(ctx context.Context, merger *Agent, namespace types.StreamName, zlib bool, err error) *buildStateTracker {
   212  	ret := &buildStateTracker{
   213  		ctx:         ctx,
   214  		merger:      merger,
   215  		zlib:        zlib,
   216  		ldNamespace: namespace.AsNamespace(),
   217  		latestState: &buildState{},
   218  	}
   219  
   220  	if err != nil {
   221  		ret.latestState.build = &bbpb.Build{}
   222  		setErrorOnBuild(ret.latestState.build, err)
   223  		ret.finalize()
   224  		ret.Close()
   225  	} else {
   226  		ret.work, err = dispatcher.NewChannel(ctx, &dispatcher.Options{
   227  			Buffer: buffer.Options{
   228  				MaxLeases:     1,
   229  				BatchItemsMax: 1,
   230  				FullBehavior:  &buffer.DropOldestBatch{},
   231  			},
   232  			DropFn:    dispatcher.DropFnQuiet,
   233  			DrainedFn: ret.finalize,
   234  		}, ret.parseAndSend)
   235  		if err != nil {
   236  			panic(err) // creating dispatcher with static config should never fail
   237  		}
   238  		// Attach the cancelation of the context to the closure of work.C.
   239  		go func() {
   240  			select {
   241  			case <-ctx.Done():
   242  				ret.Close()
   243  			case <-ret.work.DrainC:
   244  				// already shut down w/o cancelation
   245  			}
   246  		}()
   247  	}
   248  
   249  	return ret
   250  }
   251  
   252  // finalized is called exactly once when either:
   253  //
   254  //   - newBuildStateTracker is called with err != nil
   255  //   - buildStateTracker.work is fully shut down (this is installed as
   256  //     dispatcher.Options.DrainedFn)
   257  func (t *buildStateTracker) finalize() {
   258  	t.latestStateMu.Lock()
   259  	defer t.latestStateMu.Unlock()
   260  
   261  	state := *t.latestState
   262  	if state.final {
   263  		panic("impossible; finalize called twice?")
   264  	}
   265  
   266  	state.closed = true
   267  	state.final = true
   268  	if state.build == nil {
   269  		state.build = &bbpb.Build{
   270  			SummaryMarkdown: "Never received any build data.",
   271  			Status:          bbpb.Status_INFRA_FAILURE,
   272  			Output: &bbpb.Build_Output{
   273  				Status:          bbpb.Status_INFRA_FAILURE,
   274  				SummaryMarkdown: "Never received any build data.",
   275  			},
   276  		}
   277  	}
   278  	processFinalBuild(t.merger.clockNow(), state.build)
   279  	state.buildReadOnly = nil
   280  	t.latestState = &state
   281  	t.merger.informNewData()
   282  }
   283  
   284  func (t *buildStateTracker) parseAndSend(data *buffer.Batch) error {
   285  	t.latestStateMu.Lock()
   286  	state := *t.latestState
   287  	t.latestStateMu.Unlock()
   288  
   289  	// already closed
   290  	if state.closed {
   291  		return nil
   292  	}
   293  
   294  	newBuild, err := t.parseBuild(data.Data[0].Item.([]byte))
   295  	// may set state.closed on an error
   296  	t.updateState(newBuild, err)
   297  
   298  	t.merger.informNewData()
   299  	return nil
   300  }
   301  
   302  // getLatestBuild returns the Build in the current state.
   303  //
   304  // It returns the internal read-only copy of the build to avoid the read/write race.
   305  func (t *buildStateTracker) getLatestBuild() *bbpb.Build {
   306  	t.latestStateMu.Lock()
   307  	defer t.latestStateMu.Unlock()
   308  
   309  	// Lazily clone the build to its read-only copy when needed.
   310  	if t.latestState.buildReadOnly == nil {
   311  		t.latestState.buildReadOnly = proto.Clone(t.latestState.build).(*bbpb.Build)
   312  	}
   313  	return t.latestState.buildReadOnly
   314  }
   315  
   316  // This implements the bundler.StreamChunkCallback callback function.
   317  //
   318  // Each call to `handleNewData` expects `entry` to have a complete (non-Partial)
   319  // datagram containing a single Build message. The message will (eventually) be
   320  // parsed and fixed up (e.g. fixing Log Url/ViewUrl), and become this
   321  // buildStateTracker's new state.
   322  //
   323  // This method does not block; Data here is submitted to the buildStateTracker's
   324  // internal worker, which processes state updates as quickly as it can, skipping
   325  // state updates which are submitted too rapidly.
   326  //
   327  // This method has no effect if the buildStateTracker is 'closed'.
   328  //
   329  // When this is called with `nil` as an argument (when the attached logdog
   330  // stream is closed), it will start the closure process on this
   331  // buildStateTracker. The final build state can be obtained synchronously by
   332  // calling GetFinal().
   333  func (t *buildStateTracker) handleNewData(entry *logpb.LogEntry) {
   334  	t.workMu.Lock()
   335  	defer t.workMu.Unlock()
   336  
   337  	if entry == nil {
   338  		t.closeWorkLocked()
   339  	} else if !t.workClosed {
   340  		select {
   341  		case t.work.C <- entry.GetDatagram().Data:
   342  		case <-t.ctx.Done():
   343  			t.closeWorkLocked()
   344  		}
   345  	}
   346  }
   347  
   348  func (t *buildStateTracker) closeWorkLocked() {
   349  	if !t.workClosed {
   350  		if t.work.C != nil {
   351  			close(t.work.C)
   352  		}
   353  		t.workClosed = true
   354  	}
   355  }
   356  
   357  func (t *buildStateTracker) Close() {
   358  	t.workMu.Lock()
   359  	defer t.workMu.Unlock()
   360  	t.closeWorkLocked()
   361  }
   362  
   363  // Drain waits for the build state to finalize.
   364  func (t *buildStateTracker) Drain() {
   365  	if t.work.DrainC != nil {
   366  		<-t.work.DrainC
   367  	}
   368  }