go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/appengine/coordinator/logStream.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package coordinator 16 17 import ( 18 "context" 19 "fmt" 20 "regexp" 21 "strings" 22 "time" 23 24 "google.golang.org/grpc/codes" 25 "google.golang.org/grpc/status" 26 "google.golang.org/protobuf/proto" 27 28 "go.chromium.org/luci/common/errors" 29 ds "go.chromium.org/luci/gae/service/datastore" 30 "go.chromium.org/luci/logdog/api/logpb" 31 "go.chromium.org/luci/logdog/common/types" 32 ) 33 34 // CurrentSchemaVersion is the current schema version of the LogStream. 35 // Changes that are not backward-compatible should update this field so 36 // migration logic and scripts can translate appropriately. 37 // 38 // History: 39 // 40 // 1 - Contained _Tags and _C queryable fields 41 // 2 - Removed _Tags and _C queryable fields and applied noindex to 42 // most fields, since query filtering is now implemented in-memory instead 43 // of via datastore filters. 44 // 3 - Removed all non-indexed fields which are redundant with content in 45 // Descriptor. 46 const CurrentSchemaVersion = "3" 47 48 // ErrPathNotFound is the canonical error returned when a Log Stream Path is not found. 49 var ErrPathNotFound = status.Error(codes.NotFound, "path not found") 50 51 // LogStreamExpiry is the duration after creation that a LogStream 52 // record should persist for. After this duration it may be deleted. 53 const LogStreamExpiry = 540 * 24 * time.Hour 54 55 // LogStream is the primary datastore model containing information and state of 56 // an individual log stream. 57 type LogStream struct { 58 // ID is the LogStream ID. It is generated from the stream's Prefix/Name 59 // fields. 60 ID HashID `gae:"$id"` 61 62 // Schema is the datastore schema version for this object. This can be used 63 // to facilitate schema migrations. 64 // 65 // The current schema is currentSchemaVersion. 66 Schema string // index needed for batch conversions 67 68 // Prefix is this log stream's prefix value. Log streams with the same prefix 69 // are logically grouped. 70 // 71 // This value should not be changed once populated, as it will invalidate the 72 // ID. 73 Prefix string // index needed for Query RPC 74 // Name is the unique name of this log stream within the Prefix scope. 75 // 76 // This value should not be changed once populated, as it will invalidate the 77 // ID. 78 Name string `gae:",noindex"` 79 80 // Created is the time when this stream was created. 81 Created time.Time `gae:",noindex"` 82 // ExpireAt is time after which the datastore entry for the stream will be deleted. 83 ExpireAt time.Time `gae:",noindex"` 84 85 // Purged, if true, indicates that this log stream has been marked as purged. 86 // Non-administrative queries and requests for this stream will operate as 87 // if this entry doesn't exist. 88 Purged bool `gae:",noindex"` 89 // PurgedTime is the time when this stream was purged. 90 PurgedTime time.Time `gae:",noindex"` 91 92 // ProtoVersion is the version string of the protobuf, as reported by the 93 // Collector (and ultimately self-identified by the Butler). 94 ProtoVersion string `gae:",noindex"` 95 // Descriptor is the binary protobuf data LogStreamDescriptor. 96 Descriptor []byte `gae:",noindex"` 97 98 // extra causes datastore to ignore unrecognized fields and strip them in 99 // future writes. 100 extra ds.PropertyMap `gae:"-,extra"` 101 102 // noDSValidate is a testing parameter to instruct the LogStream not to 103 // validate before reading/writing to datastore. It can be controlled by 104 // calling SetDSValidate(). 105 noDSValidate bool 106 } 107 108 var _ interface { 109 ds.PropertyLoadSaver 110 } = (*LogStream)(nil) 111 112 // LogStreamID returns the HashID for a given log stream path. 113 func LogStreamID(path types.StreamPath) HashID { 114 return makeHashID(string(path)) 115 } 116 117 // PopulateState populates the datastore key fields for the supplied 118 // LogStreamState, binding them to the current LogStream. 119 func (s *LogStream) PopulateState(c context.Context, lst *LogStreamState) { 120 lst.Parent = ds.KeyForObj(c, s) 121 } 122 123 // State returns the LogStreamState keyed for this LogStream. 124 func (s *LogStream) State(c context.Context) *LogStreamState { 125 var lst LogStreamState 126 s.PopulateState(c, &lst) 127 return &lst 128 } 129 130 // Path returns the LogDog path for this log stream. 131 func (s *LogStream) Path() types.StreamPath { 132 return types.StreamName(s.Prefix).Join(types.StreamName(s.Name)) 133 } 134 135 // Load implements ds.PropertyLoadSaver. 136 func (s *LogStream) Load(pmap ds.PropertyMap) error { 137 // Drop old _C and _Tags fields to save memory. 138 // * _C is is derived entirely from Prefix and Name 139 // * _Tags is derived entirely from Descriptor 140 // * Tags is derived entirely from Descriptor (and briefly appeared in 141 // schema version 2) 142 delete(pmap, "_C") 143 delete(pmap, "_Tags") 144 delete(pmap, "Tags") 145 146 if err := ds.GetPLS(s).Load(pmap); err != nil { 147 return err 148 } 149 150 // Validate the log stream. Don't enforce ID correctness, since 151 // datastore hasn't populated that field yet. 152 if !s.noDSValidate { 153 if err := s.validateImpl(false); err != nil { 154 return err 155 } 156 } 157 return nil 158 } 159 160 // Save implements ds.PropertyLoadSaver. 161 func (s *LogStream) Save(withMeta bool) (ds.PropertyMap, error) { 162 if !s.noDSValidate { 163 if err := s.validateImpl(true); err != nil { 164 return nil, err 165 } 166 } 167 s.Schema = CurrentSchemaVersion 168 169 return ds.GetPLS(s).Save(withMeta) 170 } 171 172 // Validate evaluates the state and data contents of the LogStream and returns 173 // an error if it is invalid. 174 func (s *LogStream) Validate() error { 175 return s.validateImpl(true) 176 } 177 178 func (s *LogStream) validateImpl(enforceHashID bool) error { 179 if enforceHashID { 180 // Make sure our Prefix and Name match the Hash ID. 181 if hid := LogStreamID(s.Path()); hid != s.ID { 182 return fmt.Errorf("hash IDs don't match (%q != %q)", hid, s.ID) 183 } 184 } 185 186 if err := types.StreamName(s.Prefix).Validate(); err != nil { 187 return fmt.Errorf("invalid prefix: %s", err) 188 } 189 if err := types.StreamName(s.Name).Validate(); err != nil { 190 return fmt.Errorf("invalid name: %s", err) 191 } 192 if s.Created.IsZero() { 193 return errors.New("created time is not set") 194 } 195 196 // Ensure that our Descriptor can be unmarshalled. 197 if _, err := s.DescriptorProto(); err != nil { 198 return fmt.Errorf("could not unmarshal descriptor: %v", err) 199 } 200 return nil 201 } 202 203 // LoadDescriptor loads the fields in the log stream descriptor into this 204 // LogStream entry. These fields are: 205 // - Prefix 206 // - Name 207 // - Descriptor 208 func (s *LogStream) LoadDescriptor(desc *logpb.LogStreamDescriptor) error { 209 if err := desc.Validate(true); err != nil { 210 return fmt.Errorf("invalid descriptor: %v", err) 211 } 212 213 pb, err := proto.Marshal(desc) 214 if err != nil { 215 return fmt.Errorf("failed to marshal descriptor: %v", err) 216 } 217 218 s.Prefix = desc.Prefix 219 s.Name = desc.Name 220 s.Descriptor = pb 221 222 return nil 223 } 224 225 // DescriptorProto unmarshals a LogStreamDescriptor from the stream's Descriptor 226 // field. It will return an error if the unmarshalling fails. 227 func (s *LogStream) DescriptorProto() (*logpb.LogStreamDescriptor, error) { 228 desc := logpb.LogStreamDescriptor{} 229 if err := proto.Unmarshal(s.Descriptor, &desc); err != nil { 230 return nil, err 231 } 232 return &desc, nil 233 } 234 235 // SetDSValidate controls whether this LogStream is validated prior to being 236 // read from or written to datastore. 237 // 238 // This is a testing parameter, and should NOT be used in production code. 239 func (s *LogStream) SetDSValidate(v bool) { 240 s.noDSValidate = !v 241 } 242 243 // LogStreamQuery is a function returning `true` if the provided LogStream 244 // matches. 245 type LogStreamQuery struct { 246 Prefix types.StreamName // the prefix being queried 247 248 q *ds.Query 249 includePurged bool 250 checks []func(*LogStream) bool 251 descChecks []func(*logpb.LogStreamDescriptor) bool 252 } 253 254 // NewLogStreamQuery returns a new LogStreamQuery constrained to the prefix of 255 // `pathGlob`, and with a filter function for the stream name in `pathGlob`. 256 // 257 // By default, it will exclude purged logs. 258 // 259 // pathGlob must have a prefix without wildcards, and a stream name portion 260 // which can include `*` or `**` in any combination. 261 // 262 // Returns an error if the supplied pathGlob string describes an invalid query. 263 func NewLogStreamQuery(pathGlob string) (*LogStreamQuery, error) { 264 prefix, name := types.StreamPath(pathGlob).Split() 265 266 if prefix == "" { 267 return nil, errors.New("prefix invalid: empty") 268 } 269 if strings.ContainsRune(string(prefix), '*') { 270 return nil, errors.New("prefix invalid: contains wildcard `*`") 271 } 272 if err := prefix.Validate(); err != nil { 273 return nil, errors.Annotate(err, "prefix invalid").Err() 274 } 275 276 if name == "" { 277 name = "**" 278 } 279 if err := types.StreamName(strings.Replace(string(name), "*", "a", -1)).Validate(); err != nil { 280 return nil, errors.Annotate(err, "name invalid").Err() 281 } 282 283 ret := &LogStreamQuery{ 284 Prefix: prefix, 285 q: ds.NewQuery("LogStream").Eq("Prefix", string(prefix)), 286 } 287 288 // Escape all regexp metachars. This will have the effect of escaping * as 289 // well. We can then replace sequences of escaped *'s to get the expression we 290 // want. 291 nameEscaped := regexp.QuoteMeta(string(name)) 292 exp := strings.NewReplacer( 293 "/\\*\\*/", "(.*)/", 294 "/\\*\\*", "(.*)", 295 "\\*\\*/", "(.*)", 296 "\\*\\*", "(.*)", 297 "\\*", "([^/][^/]*)", 298 ).Replace(nameEscaped) 299 300 re, err := regexp.Compile(fmt.Sprintf("^%s$", exp)) 301 if err != nil { 302 return nil, errors.Annotate(err, "compiling name regex").Err() 303 } 304 305 // this function implements the check for purged as well as the name 306 // assertion. 307 ret.checks = append(ret.checks, func(ls *LogStream) bool { 308 if !ret.includePurged && ls.Purged { 309 return false 310 } 311 return re.MatchString(ls.Name) 312 }) 313 314 return ret, nil 315 } 316 317 // SetCursor causes the LogStreamQuery to start from the given encoded cursor. 318 func (lsp *LogStreamQuery) SetCursor(ctx context.Context, cursor string) error { 319 if cursor == "" { 320 return nil 321 } 322 323 cursorObj, err := ds.DecodeCursor(ctx, cursor) 324 if err != nil { 325 return err 326 } 327 328 lsp.q = lsp.q.Start(cursorObj) 329 return nil 330 } 331 332 // OnlyContentType constrains the LogStreamQuery to only return LogStreams of 333 // the given content type. 334 func (lsp *LogStreamQuery) OnlyContentType(ctype string) { 335 if ctype == "" { 336 return 337 } 338 lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool { 339 return desc.ContentType == ctype 340 }) 341 } 342 343 // OnlyStreamType constrains the LogStreamQuery to only return LogStreams of 344 // the given stream type. 345 func (lsp *LogStreamQuery) OnlyStreamType(stype logpb.StreamType) error { 346 if _, ok := logpb.StreamType_name[int32(stype)]; !ok { 347 return errors.New("unknown StreamType") 348 } 349 lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool { 350 return desc.StreamType == stype 351 }) 352 return nil 353 } 354 355 // IncludePurged will have the LogStreamQuery return purged logs as well. 356 func (lsp *LogStreamQuery) IncludePurged() { 357 lsp.includePurged = true 358 } 359 360 // OnlyPurged will have the LogStreamQuery return ONLY purged logs. 361 // 362 // Will result in NO logs if IncludePurged hasn't been set. 363 func (lsp *LogStreamQuery) OnlyPurged() { 364 lsp.checks = append(lsp.checks, func(ls *LogStream) bool { 365 return ls.Purged 366 }) 367 } 368 369 // MustHaveTags constrains LogStreams returned to have all of the given tags. 370 func (lsp *LogStreamQuery) MustHaveTags(tags map[string]string) { 371 lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool { 372 for k, v := range tags { 373 actual, ok := desc.Tags[k] 374 if !ok { 375 return false 376 } 377 if v != "" && v != actual { 378 return false 379 } 380 } 381 return true 382 }) 383 } 384 385 func (lsp *LogStreamQuery) filter(ls *LogStream) bool { 386 for _, checkFn := range lsp.checks { 387 if !checkFn(ls) { 388 return false 389 } 390 } 391 if len(lsp.descChecks) > 0 { 392 desc, err := ls.DescriptorProto() 393 if err != nil { 394 return false 395 } 396 397 for _, checkFn := range lsp.descChecks { 398 if !checkFn(desc) { 399 return false 400 } 401 } 402 } 403 return true 404 } 405 406 // Run executes the LogStreamQuery and calls `cb` with each LogStream which 407 // matches the LogStreamQuery. 408 // 409 // If `cb` returns ds.Stop, the query will stop with a nil error. 410 // If `cb` returns a different error, the query will stop with the returned 411 // error. 412 // If `cb` returns nil, the query continues until it exhausts. 413 func (lsp *LogStreamQuery) Run(ctx context.Context, cb func(*LogStream, ds.CursorCB) error) error { 414 return ds.Run(ctx, lsp.q, func(ls *LogStream, getCursor ds.CursorCB) (err error) { 415 if lsp.filter(ls) { 416 err = cb(ls, getCursor) 417 } 418 return 419 }) 420 }