go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/model/metadata.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "bytes" 19 "context" 20 "sort" 21 "time" 22 "unicode/utf8" 23 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/data/stringset" 28 "go.chromium.org/luci/common/errors" 29 "go.chromium.org/luci/common/retry/transient" 30 "go.chromium.org/luci/gae/service/datastore" 31 "go.chromium.org/luci/grpc/grpcutil" 32 "go.chromium.org/luci/server/auth" 33 34 api "go.chromium.org/luci/cipd/api/cipd/v1" 35 "go.chromium.org/luci/cipd/common" 36 ) 37 38 // InstanceMetadata represents one instance metadata entry. 39 // 40 // It is a key-value pair (along with some additional attributes). 41 // 42 // The parent entity is the instance entity. ID is derived from 43 // the key-value pair, see common.InstanceMetadataFingerprint. 44 type InstanceMetadata struct { 45 _kind string `gae:"$kind,InstanceMetadata"` 46 _extra datastore.PropertyMap `gae:"-,extra"` 47 48 Fingerprint string `gae:"$id"` // see common.InstanceMetadataFingerprint 49 Instance *datastore.Key `gae:"$parent"` // a key of the corresponding Instance entity 50 51 Key string `gae:"key"` // the metadata key 52 Value []byte `gae:"value,noindex"` // the metadata payload, can be big 53 ContentType string `gae:"content_type,noindex"` // a content type (perhaps guessed) 54 55 AttachedBy string `gae:"attached_by"` // who added this metadata 56 AttachedTs time.Time `gae:"attached_ts"` // when it was added 57 } 58 59 // Proto returns cipd.InstanceMetadata proto with information from this entity. 60 // 61 // Assumes the entity is valid. 62 func (md *InstanceMetadata) Proto() *api.InstanceMetadata { 63 return &api.InstanceMetadata{ 64 Key: md.Key, 65 Value: md.Value, 66 ContentType: md.ContentType, 67 Fingerprint: md.Fingerprint, 68 AttachedBy: md.AttachedBy, 69 AttachedTs: timestamppb.New(md.AttachedTs), 70 } 71 } 72 73 // AttachMetadata transactionally attaches metadata to an instance. 74 // 75 // Mutates `md` in place by calculating fingerprints and "guessing" content 76 // type if necessary. 77 // 78 // Assumes inputs are already validated. Launches a transaction inside (and thus 79 // can't be a part of a transaction itself). Updates 'inst' in-place with the 80 // most recent instance state. 81 // 82 // Returns gRPC-tagged errors: 83 // 84 // NotFound if there's no such instance or package. 85 // FailedPrecondition if some processors are still running. 86 // Aborted if some processors have failed. 87 // Internal on fingerprint collision. 88 func AttachMetadata(ctx context.Context, inst *Instance, md []*api.InstanceMetadata) error { 89 now := clock.Now(ctx).UTC() 90 who := string(auth.CurrentIdentity(ctx)) 91 92 // Calculate fingerprints and guess content type before the transaction, it is 93 // relatively slow. Throw away duplicate entries. 94 seen := stringset.New(len(md)) 95 filtered := md[:0] 96 for _, m := range md { 97 m.Fingerprint = common.InstanceMetadataFingerprint(m.Key, m.Value) 98 if seen.Add(m.Fingerprint) { 99 if m.ContentType == "" { 100 if guessPlainText(m.Value) { 101 m.ContentType = "text/plain" 102 } else { 103 m.ContentType = "application/octet-stream" 104 } 105 } 106 filtered = append(filtered, m) 107 } 108 } 109 md = filtered 110 111 return Txn(ctx, "AttachMetadata", func(ctx context.Context) error { 112 if err := CheckInstanceReady(ctx, inst); err != nil { 113 return err 114 } 115 116 // Prepare to fetch everything from the datastore. 117 instKey := datastore.KeyForObj(ctx, inst) 118 ents := make([]*InstanceMetadata, len(md)) 119 for i, m := range md { 120 ents[i] = &InstanceMetadata{ 121 Fingerprint: m.Fingerprint, 122 Instance: instKey, 123 } 124 } 125 126 // For all existing entries, double check their key-value pair matches 127 // the one we try to attach. If not, we've got a hash collision in the 128 // fingerprint. This should be super rare, but it doesn't hurt to check 129 // since we fetched the entity already. 130 checkExisting := func(ent *InstanceMetadata, msg *api.InstanceMetadata) error { 131 if ent.Key != msg.Key { 132 return errors.Reason("fingerprint %q matches two metadata keys %q and %q, aborting", ent.Fingerprint, ent.Key, msg.Key). 133 Tag(grpcutil.InternalTag).Err() 134 } 135 if !bytes.Equal(ent.Value, msg.Value) { 136 return errors.Reason("fingerprint %q matches metadata key %q with two different values, aborting", ent.Fingerprint, ent.Key). 137 Tag(grpcutil.InternalTag).Err() 138 } 139 return nil 140 } 141 142 // Find entries that don't exist yet. We don't want to blindly overwrite 143 // existing entries, since we want to preserve their AttachedBy/AttachedTs 144 // etc. and skip emitting INSTANCE_METADATA_ATTACHED event log entries. 145 missing := make([]*InstanceMetadata, 0, len(ents)) 146 if err := datastore.Get(ctx, ents); err != nil { 147 merr, ok := err.(errors.MultiError) 148 if !ok { 149 return errors.Annotate(err, "failed to fetch metadata").Tag(transient.Tag).Err() 150 } 151 for i, err := range merr { 152 switch err { 153 case nil: 154 if err := checkExisting(ents[i], md[i]); err != nil { 155 return err 156 } 157 case datastore.ErrNoSuchEntity: 158 // Populate the rest of the entity fields from input proto fields. 159 ent, msg := ents[i], md[i] 160 ent.Key = msg.Key 161 ent.Value = msg.Value 162 ent.ContentType = msg.ContentType 163 ent.AttachedBy = who 164 ent.AttachedTs = now 165 missing = append(missing, ent) 166 default: 167 return errors.Annotate(err, "failed to fetch metadata %q", ents[i].Fingerprint).Tag(transient.Tag).Err() 168 } 169 } 170 } else { 171 // No error at all => all entries already exist, just check them. 172 for i := range ents { 173 if err := checkExisting(ents[i], md[i]); err != nil { 174 return err 175 } 176 } 177 } 178 179 if len(missing) == 0 { 180 return nil 181 } 182 183 // Store everything. 184 if err := datastore.Put(ctx, missing); err != nil { 185 return transient.Tag.Apply(err) 186 } 187 return flushToEventLog(ctx, missing, api.EventKind_INSTANCE_METADATA_ATTACHED, inst, who, now) 188 }) 189 } 190 191 // DetachMetadata detaches a bunch of metadata entries from an instance. 192 // 193 // Assumes inputs are already validated. If Fingerprint is populated, uses it 194 // to identifies entries to detach. Otherwise calculates it from Key and Value 195 // (which must be populated in this case). 196 // 197 // Launches a transaction inside (and thus can't be a part of a transaction 198 // itself). 199 func DetachMetadata(ctx context.Context, inst *Instance, md []*api.InstanceMetadata) error { 200 now := clock.Now(ctx).UTC() 201 who := string(auth.CurrentIdentity(ctx)) 202 203 // Calculate fingerprints before the transaction, it is relatively slow. Throw 204 // away duplicate entries. 205 seen := stringset.New(len(md)) 206 filtered := md[:0] 207 for _, m := range md { 208 if m.Fingerprint == "" { 209 m.Fingerprint = common.InstanceMetadataFingerprint(m.Key, m.Value) 210 } 211 if seen.Add(m.Fingerprint) { 212 filtered = append(filtered, m) 213 } 214 } 215 md = filtered 216 217 return Txn(ctx, "DetachMetadata", func(ctx context.Context) error { 218 // Prepare to fetch everything from the datastore to figure out what entries 219 // actually exist, for the event log. 220 instKey := datastore.KeyForObj(ctx, inst) 221 ents := make([]*InstanceMetadata, len(md)) 222 for i, m := range md { 223 ents[i] = &InstanceMetadata{ 224 Fingerprint: m.Fingerprint, 225 Instance: instKey, 226 } 227 } 228 229 existing := make([]*InstanceMetadata, 0, len(ents)) 230 if err := datastore.Get(ctx, ents); err != nil { 231 merr, ok := err.(errors.MultiError) 232 if !ok { 233 return errors.Annotate(err, "failed to fetch metadata").Tag(transient.Tag).Err() 234 } 235 for i, err := range merr { 236 switch err { 237 case nil: 238 existing = append(existing, ents[i]) 239 case datastore.ErrNoSuchEntity: 240 // Skip, that's ok. 241 default: 242 return errors.Annotate(err, "failed to fetch metadata %q", ents[i].Fingerprint).Tag(transient.Tag).Err() 243 } 244 } 245 } else { 246 existing = ents 247 } 248 249 if len(existing) == 0 { 250 return nil 251 } 252 253 // Store everything. 254 if err := datastore.Delete(ctx, existing); err != nil { 255 return transient.Tag.Apply(err) 256 } 257 return flushToEventLog(ctx, existing, api.EventKind_INSTANCE_METADATA_DETACHED, inst, who, now) 258 }) 259 } 260 261 // ListMetadata lists all instance metadata. 262 // 263 // The result is ordered by AttachedTs (the most recent first). 264 func ListMetadata(ctx context.Context, inst *Instance) ([]*InstanceMetadata, error) { 265 // Note: 'Order' here is unnecessary, since we sort in memory later anyhow. 266 // But it is here in an anticipation of eventually implementing pagination. 267 q := datastore.NewQuery("InstanceMetadata"). 268 Ancestor(datastore.KeyForObj(ctx, inst)). 269 Order("-attached_ts") 270 271 var out []*InstanceMetadata 272 if err := datastore.GetAll(ctx, q, &out); err != nil { 273 return nil, errors.Annotate(err, "datastore query failed").Tag(transient.Tag).Err() 274 } 275 orderByTsAndKey(out) 276 277 return out, nil 278 } 279 280 // ListMetadataWithKeys lists instance metadata with any of the given keys. 281 // 282 // The result is ordered by AttachedTs (the most recent first). 283 func ListMetadataWithKeys(ctx context.Context, inst *Instance, keys []string) ([]*InstanceMetadata, error) { 284 if len(keys) == 0 { 285 panic("must not be empty") 286 } 287 288 qs := make([]*datastore.Query, len(keys)) 289 for i, key := range keys { 290 qs[i] = datastore.NewQuery("InstanceMetadata"). 291 Ancestor(datastore.KeyForObj(ctx, inst)). 292 Eq("key", key). 293 Order("-attached_ts") 294 } 295 296 var out []*InstanceMetadata 297 err := datastore.RunMulti(ctx, qs, func(md *InstanceMetadata) { 298 out = append(out, md) 299 }) 300 if err != nil { 301 return nil, errors.Annotate(err, "datastore query failed").Tag(transient.Tag).Err() 302 } 303 orderByTsAndKey(out) 304 305 return out, nil 306 } 307 308 // orderByTsAndKey order entries by (-AttachedTs, Key). 309 func orderByTsAndKey(md []*InstanceMetadata) { 310 sort.Slice(md, func(i, j int) bool { 311 l, r := md[i], md[j] 312 if l.AttachedTs.Equal(r.AttachedTs) { 313 return l.Key < r.Key 314 } 315 return l.AttachedTs.After(r.AttachedTs) 316 }) 317 } 318 319 // flushToEventLog emits a bunch of event log entries with metadata. 320 func flushToEventLog(ctx context.Context, ents []*InstanceMetadata, kind api.EventKind, inst *Instance, who string, now time.Time) error { 321 nowTS := timestamppb.New(now) 322 events := Events{} 323 for _, ent := range ents { 324 // Export only valid UTF-8 values of known text-like content types. 325 mdValue := "" 326 if IsTextContentType(ent.ContentType) { 327 mdValue = string(ent.Value) 328 if !utf8.ValidString(mdValue) { 329 mdValue = "" 330 } 331 } 332 events.Emit(&api.Event{ 333 Kind: kind, 334 Package: inst.Package.StringID(), 335 Instance: inst.InstanceID, 336 Who: who, 337 When: nowTS, 338 MdKey: ent.Key, 339 MdValue: mdValue, 340 MdContentType: ent.ContentType, 341 MdFingerprint: ent.Fingerprint, 342 }) 343 } 344 return events.Flush(ctx) 345 } 346 347 // guessPlainText returns true for smallish printable ASCII strings. 348 func guessPlainText(v []byte) bool { 349 if len(v) >= 32768 { 350 return false 351 } 352 for _, b := range v { 353 // Acceptable non-printable chars. 354 if b == '\r' || b == '\n' || b == '\t' { 355 continue 356 } 357 // Everything else should be from a printable ASCII range. 358 if b < ' ' || b >= 0x7F { 359 return false 360 } 361 } 362 return true 363 }