go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/model/metadata.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"sort"
    21  	"time"
    22  	"unicode/utf8"
    23  
    24  	"google.golang.org/protobuf/types/known/timestamppb"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/data/stringset"
    28  	"go.chromium.org/luci/common/errors"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  	"go.chromium.org/luci/gae/service/datastore"
    31  	"go.chromium.org/luci/grpc/grpcutil"
    32  	"go.chromium.org/luci/server/auth"
    33  
    34  	api "go.chromium.org/luci/cipd/api/cipd/v1"
    35  	"go.chromium.org/luci/cipd/common"
    36  )
    37  
    38  // InstanceMetadata represents one instance metadata entry.
    39  //
    40  // It is a key-value pair (along with some additional attributes).
    41  //
    42  // The parent entity is the instance entity. ID is derived from
    43  // the key-value pair, see common.InstanceMetadataFingerprint.
    44  type InstanceMetadata struct {
    45  	_kind  string                `gae:"$kind,InstanceMetadata"`
    46  	_extra datastore.PropertyMap `gae:"-,extra"`
    47  
    48  	Fingerprint string         `gae:"$id"`     // see common.InstanceMetadataFingerprint
    49  	Instance    *datastore.Key `gae:"$parent"` // a key of the corresponding Instance entity
    50  
    51  	Key         string `gae:"key"`                  // the metadata key
    52  	Value       []byte `gae:"value,noindex"`        // the metadata payload, can be big
    53  	ContentType string `gae:"content_type,noindex"` // a content type (perhaps guessed)
    54  
    55  	AttachedBy string    `gae:"attached_by"` // who added this metadata
    56  	AttachedTs time.Time `gae:"attached_ts"` // when it was added
    57  }
    58  
    59  // Proto returns cipd.InstanceMetadata proto with information from this entity.
    60  //
    61  // Assumes the entity is valid.
    62  func (md *InstanceMetadata) Proto() *api.InstanceMetadata {
    63  	return &api.InstanceMetadata{
    64  		Key:         md.Key,
    65  		Value:       md.Value,
    66  		ContentType: md.ContentType,
    67  		Fingerprint: md.Fingerprint,
    68  		AttachedBy:  md.AttachedBy,
    69  		AttachedTs:  timestamppb.New(md.AttachedTs),
    70  	}
    71  }
    72  
    73  // AttachMetadata transactionally attaches metadata to an instance.
    74  //
    75  // Mutates `md` in place by calculating fingerprints and "guessing" content
    76  // type if necessary.
    77  //
    78  // Assumes inputs are already validated. Launches a transaction inside (and thus
    79  // can't be a part of a transaction itself). Updates 'inst' in-place with the
    80  // most recent instance state.
    81  //
    82  // Returns gRPC-tagged errors:
    83  //
    84  //	NotFound if there's no such instance or package.
    85  //	FailedPrecondition if some processors are still running.
    86  //	Aborted if some processors have failed.
    87  //	Internal on fingerprint collision.
    88  func AttachMetadata(ctx context.Context, inst *Instance, md []*api.InstanceMetadata) error {
    89  	now := clock.Now(ctx).UTC()
    90  	who := string(auth.CurrentIdentity(ctx))
    91  
    92  	// Calculate fingerprints and guess content type before the transaction, it is
    93  	// relatively slow. Throw away duplicate entries.
    94  	seen := stringset.New(len(md))
    95  	filtered := md[:0]
    96  	for _, m := range md {
    97  		m.Fingerprint = common.InstanceMetadataFingerprint(m.Key, m.Value)
    98  		if seen.Add(m.Fingerprint) {
    99  			if m.ContentType == "" {
   100  				if guessPlainText(m.Value) {
   101  					m.ContentType = "text/plain"
   102  				} else {
   103  					m.ContentType = "application/octet-stream"
   104  				}
   105  			}
   106  			filtered = append(filtered, m)
   107  		}
   108  	}
   109  	md = filtered
   110  
   111  	return Txn(ctx, "AttachMetadata", func(ctx context.Context) error {
   112  		if err := CheckInstanceReady(ctx, inst); err != nil {
   113  			return err
   114  		}
   115  
   116  		// Prepare to fetch everything from the datastore.
   117  		instKey := datastore.KeyForObj(ctx, inst)
   118  		ents := make([]*InstanceMetadata, len(md))
   119  		for i, m := range md {
   120  			ents[i] = &InstanceMetadata{
   121  				Fingerprint: m.Fingerprint,
   122  				Instance:    instKey,
   123  			}
   124  		}
   125  
   126  		// For all existing entries, double check their key-value pair matches
   127  		// the one we try to attach. If not, we've got a hash collision in the
   128  		// fingerprint. This should be super rare, but it doesn't hurt to check
   129  		// since we fetched the entity already.
   130  		checkExisting := func(ent *InstanceMetadata, msg *api.InstanceMetadata) error {
   131  			if ent.Key != msg.Key {
   132  				return errors.Reason("fingerprint %q matches two metadata keys %q and %q, aborting", ent.Fingerprint, ent.Key, msg.Key).
   133  					Tag(grpcutil.InternalTag).Err()
   134  			}
   135  			if !bytes.Equal(ent.Value, msg.Value) {
   136  				return errors.Reason("fingerprint %q matches metadata key %q with two different values, aborting", ent.Fingerprint, ent.Key).
   137  					Tag(grpcutil.InternalTag).Err()
   138  			}
   139  			return nil
   140  		}
   141  
   142  		// Find entries that don't exist yet. We don't want to blindly overwrite
   143  		// existing entries, since we want to preserve their AttachedBy/AttachedTs
   144  		// etc. and skip emitting INSTANCE_METADATA_ATTACHED event log entries.
   145  		missing := make([]*InstanceMetadata, 0, len(ents))
   146  		if err := datastore.Get(ctx, ents); err != nil {
   147  			merr, ok := err.(errors.MultiError)
   148  			if !ok {
   149  				return errors.Annotate(err, "failed to fetch metadata").Tag(transient.Tag).Err()
   150  			}
   151  			for i, err := range merr {
   152  				switch err {
   153  				case nil:
   154  					if err := checkExisting(ents[i], md[i]); err != nil {
   155  						return err
   156  					}
   157  				case datastore.ErrNoSuchEntity:
   158  					// Populate the rest of the entity fields from input proto fields.
   159  					ent, msg := ents[i], md[i]
   160  					ent.Key = msg.Key
   161  					ent.Value = msg.Value
   162  					ent.ContentType = msg.ContentType
   163  					ent.AttachedBy = who
   164  					ent.AttachedTs = now
   165  					missing = append(missing, ent)
   166  				default:
   167  					return errors.Annotate(err, "failed to fetch metadata %q", ents[i].Fingerprint).Tag(transient.Tag).Err()
   168  				}
   169  			}
   170  		} else {
   171  			// No error at all => all entries already exist, just check them.
   172  			for i := range ents {
   173  				if err := checkExisting(ents[i], md[i]); err != nil {
   174  					return err
   175  				}
   176  			}
   177  		}
   178  
   179  		if len(missing) == 0 {
   180  			return nil
   181  		}
   182  
   183  		// Store everything.
   184  		if err := datastore.Put(ctx, missing); err != nil {
   185  			return transient.Tag.Apply(err)
   186  		}
   187  		return flushToEventLog(ctx, missing, api.EventKind_INSTANCE_METADATA_ATTACHED, inst, who, now)
   188  	})
   189  }
   190  
   191  // DetachMetadata detaches a bunch of metadata entries from an instance.
   192  //
   193  // Assumes inputs are already validated. If Fingerprint is populated, uses it
   194  // to identifies entries to detach. Otherwise calculates it from Key and Value
   195  // (which must be populated in this case).
   196  //
   197  // Launches a transaction inside (and thus can't be a part of a transaction
   198  // itself).
   199  func DetachMetadata(ctx context.Context, inst *Instance, md []*api.InstanceMetadata) error {
   200  	now := clock.Now(ctx).UTC()
   201  	who := string(auth.CurrentIdentity(ctx))
   202  
   203  	// Calculate fingerprints before the transaction, it is relatively slow. Throw
   204  	// away duplicate entries.
   205  	seen := stringset.New(len(md))
   206  	filtered := md[:0]
   207  	for _, m := range md {
   208  		if m.Fingerprint == "" {
   209  			m.Fingerprint = common.InstanceMetadataFingerprint(m.Key, m.Value)
   210  		}
   211  		if seen.Add(m.Fingerprint) {
   212  			filtered = append(filtered, m)
   213  		}
   214  	}
   215  	md = filtered
   216  
   217  	return Txn(ctx, "DetachMetadata", func(ctx context.Context) error {
   218  		// Prepare to fetch everything from the datastore to figure out what entries
   219  		// actually exist, for the event log.
   220  		instKey := datastore.KeyForObj(ctx, inst)
   221  		ents := make([]*InstanceMetadata, len(md))
   222  		for i, m := range md {
   223  			ents[i] = &InstanceMetadata{
   224  				Fingerprint: m.Fingerprint,
   225  				Instance:    instKey,
   226  			}
   227  		}
   228  
   229  		existing := make([]*InstanceMetadata, 0, len(ents))
   230  		if err := datastore.Get(ctx, ents); err != nil {
   231  			merr, ok := err.(errors.MultiError)
   232  			if !ok {
   233  				return errors.Annotate(err, "failed to fetch metadata").Tag(transient.Tag).Err()
   234  			}
   235  			for i, err := range merr {
   236  				switch err {
   237  				case nil:
   238  					existing = append(existing, ents[i])
   239  				case datastore.ErrNoSuchEntity:
   240  					// Skip, that's ok.
   241  				default:
   242  					return errors.Annotate(err, "failed to fetch metadata %q", ents[i].Fingerprint).Tag(transient.Tag).Err()
   243  				}
   244  			}
   245  		} else {
   246  			existing = ents
   247  		}
   248  
   249  		if len(existing) == 0 {
   250  			return nil
   251  		}
   252  
   253  		// Store everything.
   254  		if err := datastore.Delete(ctx, existing); err != nil {
   255  			return transient.Tag.Apply(err)
   256  		}
   257  		return flushToEventLog(ctx, existing, api.EventKind_INSTANCE_METADATA_DETACHED, inst, who, now)
   258  	})
   259  }
   260  
   261  // ListMetadata lists all instance metadata.
   262  //
   263  // The result is ordered by AttachedTs (the most recent first).
   264  func ListMetadata(ctx context.Context, inst *Instance) ([]*InstanceMetadata, error) {
   265  	// Note: 'Order' here is unnecessary, since we sort in memory later anyhow.
   266  	// But it is here in an anticipation of eventually implementing pagination.
   267  	q := datastore.NewQuery("InstanceMetadata").
   268  		Ancestor(datastore.KeyForObj(ctx, inst)).
   269  		Order("-attached_ts")
   270  
   271  	var out []*InstanceMetadata
   272  	if err := datastore.GetAll(ctx, q, &out); err != nil {
   273  		return nil, errors.Annotate(err, "datastore query failed").Tag(transient.Tag).Err()
   274  	}
   275  	orderByTsAndKey(out)
   276  
   277  	return out, nil
   278  }
   279  
   280  // ListMetadataWithKeys lists instance metadata with any of the given keys.
   281  //
   282  // The result is ordered by AttachedTs (the most recent first).
   283  func ListMetadataWithKeys(ctx context.Context, inst *Instance, keys []string) ([]*InstanceMetadata, error) {
   284  	if len(keys) == 0 {
   285  		panic("must not be empty")
   286  	}
   287  
   288  	qs := make([]*datastore.Query, len(keys))
   289  	for i, key := range keys {
   290  		qs[i] = datastore.NewQuery("InstanceMetadata").
   291  			Ancestor(datastore.KeyForObj(ctx, inst)).
   292  			Eq("key", key).
   293  			Order("-attached_ts")
   294  	}
   295  
   296  	var out []*InstanceMetadata
   297  	err := datastore.RunMulti(ctx, qs, func(md *InstanceMetadata) {
   298  		out = append(out, md)
   299  	})
   300  	if err != nil {
   301  		return nil, errors.Annotate(err, "datastore query failed").Tag(transient.Tag).Err()
   302  	}
   303  	orderByTsAndKey(out)
   304  
   305  	return out, nil
   306  }
   307  
   308  // orderByTsAndKey order entries by (-AttachedTs, Key).
   309  func orderByTsAndKey(md []*InstanceMetadata) {
   310  	sort.Slice(md, func(i, j int) bool {
   311  		l, r := md[i], md[j]
   312  		if l.AttachedTs.Equal(r.AttachedTs) {
   313  			return l.Key < r.Key
   314  		}
   315  		return l.AttachedTs.After(r.AttachedTs)
   316  	})
   317  }
   318  
   319  // flushToEventLog emits a bunch of event log entries with metadata.
   320  func flushToEventLog(ctx context.Context, ents []*InstanceMetadata, kind api.EventKind, inst *Instance, who string, now time.Time) error {
   321  	nowTS := timestamppb.New(now)
   322  	events := Events{}
   323  	for _, ent := range ents {
   324  		// Export only valid UTF-8 values of known text-like content types.
   325  		mdValue := ""
   326  		if IsTextContentType(ent.ContentType) {
   327  			mdValue = string(ent.Value)
   328  			if !utf8.ValidString(mdValue) {
   329  				mdValue = ""
   330  			}
   331  		}
   332  		events.Emit(&api.Event{
   333  			Kind:          kind,
   334  			Package:       inst.Package.StringID(),
   335  			Instance:      inst.InstanceID,
   336  			Who:           who,
   337  			When:          nowTS,
   338  			MdKey:         ent.Key,
   339  			MdValue:       mdValue,
   340  			MdContentType: ent.ContentType,
   341  			MdFingerprint: ent.Fingerprint,
   342  		})
   343  	}
   344  	return events.Flush(ctx)
   345  }
   346  
   347  // guessPlainText returns true for smallish printable ASCII strings.
   348  func guessPlainText(v []byte) bool {
   349  	if len(v) >= 32768 {
   350  		return false
   351  	}
   352  	for _, b := range v {
   353  		// Acceptable non-printable chars.
   354  		if b == '\r' || b == '\n' || b == '\t' {
   355  			continue
   356  		}
   357  		// Everything else should be from a printable ASCII range.
   358  		if b < ' ' || b >= 0x7F {
   359  			return false
   360  		}
   361  	}
   362  	return true
   363  }