go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/model/details.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"bytes"
    19  	"compress/zlib"
    20  	"context"
    21  	"io"
    22  
    23  	"google.golang.org/protobuf/proto"
    24  	"google.golang.org/protobuf/types/known/structpb"
    25  	"google.golang.org/protobuf/types/known/timestamppb"
    26  
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/sync/parallel"
    29  	"go.chromium.org/luci/gae/service/datastore"
    30  
    31  	"go.chromium.org/luci/buildbucket/appengine/internal/compression"
    32  	pb "go.chromium.org/luci/buildbucket/proto"
    33  	"go.chromium.org/luci/buildbucket/protoutil"
    34  )
    35  
    36  const (
    37  	// BuildStepsKind is a BuildSteps entity's kind in the datastore.
    38  	BuildStepsKind = "BuildSteps"
    39  
    40  	// BuildInfraKind is a BuildInfra entity's kind in the datastore.
    41  	BuildInfraKind = "BuildInfra"
    42  
    43  	// BuildInputPropertiesKind is a BuildInputProperties entity's kind in the datastore.
    44  	BuildInputPropertiesKind = "BuildInputProperties"
    45  
    46  	// BuildOutputPropertiesKind is a BuildOutputProperties entity's kind in the datastore.
    47  	BuildOutputPropertiesKind = "BuildOutputProperties"
    48  )
    49  
    50  // maxPropertySize is the maximum property size. Any properties larger than it
    51  // should be chunked into multiple entities to fit the Datastore size limit.
    52  // This value is smaller than the real Datastore limit (1048487 bytes) in order
    53  // to give some headroom.
    54  // Note: make it to var instead of const to favor our unit tests. Otherwise, it
    55  // will take up too much memory when covering different test cases (e.g,
    56  // compressed property bytes > 4 chunks)
    57  var maxPropertySize = 1000 * 1000
    58  
    59  // defaultStructValues defaults nil or empty values inside the given
    60  // structpb.Struct. Needed because structpb.Value cannot be marshaled to JSON
    61  // unless there is a kind set.
    62  func defaultStructValues(s *structpb.Struct) {
    63  	for k, v := range s.GetFields() {
    64  		switch {
    65  		case v == nil:
    66  			s.Fields[k] = &structpb.Value{
    67  				Kind: &structpb.Value_NullValue{},
    68  			}
    69  		case v.Kind == nil:
    70  			v.Kind = &structpb.Value_NullValue{}
    71  		case v.GetStructValue() != nil:
    72  			defaultStructValues(v.GetStructValue())
    73  		}
    74  	}
    75  }
    76  
    77  // BuildInfra is a representation of a build proto's infra field
    78  // in the datastore.
    79  type BuildInfra struct {
    80  	_kind string `gae:"$kind,BuildInfra"`
    81  	// ID is always 1 because only one such entity exists.
    82  	ID int `gae:"$id,1"`
    83  	// Build is the key for the build this entity belongs to.
    84  	Build *datastore.Key `gae:"$parent"`
    85  	// Proto is the pb.BuildInfra proto representation of the infra field.
    86  	Proto *pb.BuildInfra `gae:"infra,legacy"`
    87  }
    88  
    89  var _ datastore.PropertyLoadSaver = (*BuildInfra)(nil)
    90  
    91  // Load implements datastore.PropertyLoadSaver in order to apply
    92  // defaultStructValues to bi.Proto.
    93  func (bi *BuildInfra) Load(pm datastore.PropertyMap) error {
    94  	if err := datastore.GetPLS(bi).Load(pm); err != nil {
    95  		return err
    96  	}
    97  	if bi.Proto.GetBuildbucket() != nil {
    98  		defaultStructValues(bi.Proto.Buildbucket.RequestedProperties)
    99  	}
   100  	return nil
   101  }
   102  
   103  // Save implements datastore.PropertyLoadSaver
   104  func (bi *BuildInfra) Save(withMeta bool) (datastore.PropertyMap, error) {
   105  	return datastore.GetPLS(bi).Save(withMeta)
   106  }
   107  
   108  // BuildInputProperties is a representation of a build proto's input field's
   109  // properties field in the datastore.
   110  type BuildInputProperties struct {
   111  	_kind string `gae:"$kind,BuildInputProperties"`
   112  	// ID is always 1 because only one such entity exists.
   113  	ID int `gae:"$id,1"`
   114  	// Build is the key for the build this entity belongs to.
   115  	Build *datastore.Key `gae:"$parent"`
   116  	// Proto is the structpb.Struct representation of the properties field.
   117  	Proto *structpb.Struct `gae:"properties,legacy"`
   118  }
   119  
   120  // BuildOutputProperties is a representation of a build proto's output field's
   121  // properties field in the datastore.
   122  //
   123  // Note: avoid directly access to BuildOutputProperties via datastore.Get and
   124  // datastore.Put, as it may be chunked if it exceeds maxPropertySize.
   125  // Please always use *BuildOutputProperties.Get and *BuildOutputProperties.Put.
   126  type BuildOutputProperties struct {
   127  	_     datastore.PropertyMap `gae:"-,extra"`
   128  	_kind string                `gae:"$kind,BuildOutputProperties"`
   129  	// _id is always 1 because only one such entity exists.
   130  	_id int `gae:"$id,1"`
   131  	// Build is the key for the build this entity belongs to.
   132  	Build *datastore.Key `gae:"$parent"`
   133  	// Proto is the structpb.Struct representation of the properties field.
   134  	Proto *structpb.Struct `gae:"properties,legacy"`
   135  
   136  	// ChunkCount indicates how many chunks this Proto is splitted into.
   137  	ChunkCount int `gae:"chunk_count,noindex"`
   138  }
   139  
   140  // PropertyChunk stores a chunk of serialized and compressed
   141  // BuildOutputProperties.Proto bytes.
   142  // In the future, it may expand to buildInputProperties.
   143  type PropertyChunk struct {
   144  	_kind string `gae:"$kind,PropertyChunk"`
   145  	// ID starts from 1 to N where N is BuildOutputProperties.ChunkCount.
   146  	ID int `gae:"$id"`
   147  	// The BuildOutputProperties entity that this entity belongs to.
   148  	Parent *datastore.Key `gae:"$parent"`
   149  
   150  	// chunked bytes
   151  	Bytes []byte `gae:"chunk,noindex"`
   152  }
   153  
   154  // chunkProp splits BuildOutputProperties into chunks and return them. The nil
   155  // return means it doesn't need to chunk.
   156  // Note: The caller is responsible for putting the chunks into Datastore.
   157  func (bo *BuildOutputProperties) chunkProp(c context.Context) ([]*PropertyChunk, error) {
   158  	if bo == nil || bo.Proto == nil || bo.Build == nil {
   159  		return nil, nil
   160  	}
   161  	propBytes, err := proto.Marshal(bo.Proto)
   162  	if err != nil {
   163  		return nil, errors.Annotate(err, "failed to marshal build output properties").Err()
   164  	}
   165  	if len(propBytes) <= maxPropertySize {
   166  		return nil, nil
   167  	}
   168  
   169  	// compress propBytes
   170  	compressed := make([]byte, 0, len(propBytes)/2) // hope for at least 2x compression
   171  	compressed = compression.ZstdCompress(propBytes, compressed)
   172  
   173  	// to round up the result of integer division.
   174  	count := (len(compressed) + maxPropertySize - 1) / maxPropertySize
   175  	chunks := make([]*PropertyChunk, count)
   176  	pk := datastore.KeyForObj(c, &BuildOutputProperties{
   177  		Build: datastore.KeyForObj(c, &Build{ID: bo.Build.IntID()}),
   178  	})
   179  	for i := 0; i < count; i++ {
   180  		idxStart := i * maxPropertySize
   181  		idxEnd := idxStart + maxPropertySize
   182  		if idxEnd > len(compressed) {
   183  			idxEnd = len(compressed)
   184  		}
   185  
   186  		chunks[i] = &PropertyChunk{
   187  			ID:     i + 1, // ID starts from 1.
   188  			Parent: pk,
   189  			Bytes:  compressed[idxStart:idxEnd],
   190  		}
   191  	}
   192  	return chunks, nil
   193  }
   194  
   195  // Get is a wrapper of `datastore.Get` to properly handle large properties.
   196  func (bo *BuildOutputProperties) Get(c context.Context) error {
   197  	if bo == nil || bo.Build == nil {
   198  		return nil
   199  	}
   200  
   201  	pk := datastore.KeyForObj(c, &BuildOutputProperties{
   202  		Build: datastore.KeyForObj(c, &Build{ID: bo.Build.IntID()}),
   203  	})
   204  	// Preemptively fetch up to 4 chunks to minimize Datastore RPC calls so that
   205  	// in most cases, it only needs one call.
   206  	//
   207  	// BUG(b/258241457) - Setting this to 0 to see if it tamps down suprious
   208  	// Lookup costs in datastore. Should evaluate re-enabling after we turn
   209  	// entity caching back on.
   210  	const preFetchedChunkCnt = 0
   211  	chunks := make([]*PropertyChunk, preFetchedChunkCnt)
   212  	for i := 0; i < preFetchedChunkCnt; i++ {
   213  		chunks[i] = &PropertyChunk{
   214  			ID:     i + 1, // ID starts from 1.
   215  			Parent: pk,
   216  		}
   217  	}
   218  
   219  	if err := datastore.Get(c, bo, chunks); err != nil {
   220  		switch me, ok := err.(errors.MultiError); {
   221  		case !ok:
   222  			return err
   223  		case me[0] != nil:
   224  			return me[0]
   225  		case errors.Filter(me[1], datastore.ErrNoSuchEntity) != nil:
   226  			return errors.Annotate(me[1], "fail to fetch first %d chunks for BuildOutputProperties", preFetchedChunkCnt).Err()
   227  		}
   228  	}
   229  
   230  	// No chunks.
   231  	if bo.ChunkCount == 0 {
   232  		return nil
   233  	}
   234  
   235  	// Fetch the rest chunks.
   236  	if bo.ChunkCount-preFetchedChunkCnt > 0 {
   237  		for i := preFetchedChunkCnt + 1; i <= bo.ChunkCount; i++ {
   238  			chunks = append(chunks, &PropertyChunk{
   239  				ID:     i,
   240  				Parent: pk,
   241  			})
   242  		}
   243  
   244  		if err := datastore.Get(c, chunks[preFetchedChunkCnt:]); err != nil {
   245  			return errors.Annotate(err, "failed to fetch the rest chunks for BuildOutputProperties").Err()
   246  		}
   247  	}
   248  	chunks = chunks[:bo.ChunkCount]
   249  
   250  	// Assemble proto bytes and restore to proto.
   251  	var compressedBytes []byte
   252  	for _, chunk := range chunks {
   253  		compressedBytes = append(compressedBytes, chunk.Bytes...)
   254  	}
   255  	var propBytes []byte
   256  	var err error
   257  	if propBytes, err = compression.ZstdDecompress(compressedBytes, nil); err != nil {
   258  		return errors.Annotate(err, "failed to decompress output properties bytes").Err()
   259  	}
   260  	bo.Proto = &structpb.Struct{}
   261  	if err := proto.Unmarshal(propBytes, bo.Proto); err != nil {
   262  		return errors.Annotate(err, "failed to unmarshal outputProperties' chunks").Err()
   263  	}
   264  	bo.ChunkCount = 0
   265  	return nil
   266  }
   267  
   268  // GetMultiOutputProperties fetches multiple BuildOutputProperties in parallel.
   269  func GetMultiOutputProperties(c context.Context, props ...*BuildOutputProperties) error {
   270  	nWorkers := 8
   271  	if len(props) < nWorkers {
   272  		nWorkers = len(props)
   273  	}
   274  
   275  	err := parallel.WorkPool(nWorkers, func(work chan<- func() error) {
   276  		for _, prop := range props {
   277  			prop := prop
   278  			if prop == nil || prop.Build == nil {
   279  				continue
   280  			}
   281  			work <- func() error {
   282  				return prop.Get(c)
   283  			}
   284  		}
   285  	})
   286  	return err
   287  }
   288  
   289  // Put is a wrapper of `datastore.Put` to properly handle large properties.
   290  // Suggest calling it in a transaction to correctly handle partial failures when
   291  // putting PropertyChunk and BuildOutputProperties.
   292  func (bo *BuildOutputProperties) Put(c context.Context) error {
   293  	if bo == nil || bo.Build == nil {
   294  		return nil
   295  	}
   296  
   297  	chunks, err := bo.chunkProp(c)
   298  	if err != nil {
   299  		return err
   300  	}
   301  
   302  	prop := bo.Proto
   303  	if len(chunks) != 0 {
   304  		bo.Proto = nil
   305  		bo.ChunkCount = len(chunks)
   306  	} else {
   307  		bo.ChunkCount = 0
   308  	}
   309  
   310  	if err := datastore.Put(c, bo, chunks); err != nil {
   311  		return err
   312  	}
   313  	bo.Proto = prop
   314  	return nil
   315  }
   316  
   317  // BuildStepsMaxBytes is the maximum length of BuildSteps.Bytes. If Bytes
   318  // exceeds this maximum, this package will try to compress it, setting IsZipped
   319  // accordingly, but if this length is still exceeded it's an error to write
   320  // such entities to the datastore. Use FromProto to ensure this maximum is
   321  // respected.
   322  const BuildStepsMaxBytes = 1e6
   323  
   324  // BuildSteps is a representation of a build proto's steps field
   325  // in the datastore.
   326  type BuildSteps struct {
   327  	_kind string `gae:"$kind,BuildSteps"`
   328  	// ID is always 1 because only one such entity exists.
   329  	ID int `gae:"$id,1"`
   330  	// Build is the key for the build this entity belongs to.
   331  	Build *datastore.Key `gae:"$parent"`
   332  	// IsZipped indicates whether or not Bytes is zlib compressed.
   333  	// Use ToProto to ensure this compression is respected.
   334  	IsZipped bool `gae:"step_container_bytes_zipped,noindex"`
   335  	// Bytes is the pb.Build proto representation of the build proto where only steps is set.
   336  	// IsZipped determines whether this value is compressed or not.
   337  	Bytes []byte `gae:"steps,noindex"`
   338  }
   339  
   340  // CancelIncomplete marks any incomplete steps as cancelled, returning whether
   341  // at least one step was cancelled. The caller is responsible for writing the
   342  // entity to the datastore if any steps were cancelled. This entity will not be
   343  // mutated if an error occurs.
   344  func (s *BuildSteps) CancelIncomplete(ctx context.Context, now *timestamppb.Timestamp) (bool, error) {
   345  	stp, err := s.ToProto(ctx)
   346  	if err != nil {
   347  		return false, err
   348  	}
   349  	changed := false
   350  	for _, s := range stp {
   351  		if !protoutil.IsEnded(s.Status) {
   352  			s.EndTime = now
   353  			s.Status = pb.Status_CANCELED
   354  			changed = true
   355  		}
   356  	}
   357  	if changed {
   358  		if err := s.FromProto(stp); err != nil {
   359  			return false, err
   360  		}
   361  	}
   362  	return changed, nil
   363  }
   364  
   365  // FromProto overwrites the current []*pb.Step representation of these steps.
   366  // The caller is responsible for writing the entity to the datastore. This
   367  // entity will not be mutated if an error occurs.
   368  func (s *BuildSteps) FromProto(stp []*pb.Step) error {
   369  	b, err := proto.Marshal(&pb.Build{
   370  		Steps: stp,
   371  	})
   372  	if err != nil {
   373  		return errors.Annotate(err, "failed to marshal").Err()
   374  	}
   375  	if len(b) <= BuildStepsMaxBytes {
   376  		s.Bytes = b
   377  		s.IsZipped = false
   378  		return nil
   379  	}
   380  	buf := &bytes.Buffer{}
   381  	w := zlib.NewWriter(buf)
   382  	if _, err := w.Write(b); err != nil {
   383  		return errors.Annotate(err, "error zipping").Err()
   384  	}
   385  	if err := w.Close(); err != nil {
   386  		return errors.Annotate(err, "error closing writer").Err()
   387  	}
   388  	s.Bytes = buf.Bytes()
   389  	s.IsZipped = true
   390  	return nil
   391  }
   392  
   393  // ToProto returns the []*pb.Step representation of these steps.
   394  func (s *BuildSteps) ToProto(ctx context.Context) ([]*pb.Step, error) {
   395  	b := s.Bytes
   396  	if s.IsZipped {
   397  		r, err := zlib.NewReader(bytes.NewReader(s.Bytes))
   398  		if err != nil {
   399  			return nil, errors.Annotate(err, "error creating reader for %q", datastore.KeyForObj(ctx, s)).Err()
   400  		}
   401  		b, err = io.ReadAll(r)
   402  		if err != nil {
   403  			return nil, errors.Annotate(err, "error reading %q", datastore.KeyForObj(ctx, s)).Err()
   404  		}
   405  		if err := r.Close(); err != nil {
   406  			return nil, errors.Annotate(err, "error closing reader for %q", datastore.KeyForObj(ctx, s)).Err()
   407  		}
   408  	}
   409  	p := &pb.Build{}
   410  	if err := proto.Unmarshal(b, p); err != nil {
   411  		return nil, errors.Annotate(err, "error unmarshalling %q", datastore.KeyForObj(ctx, s)).Err()
   412  	}
   413  	return p.Steps, nil
   414  }