go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/logdog/appengine/coordinator/logStream.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package coordinator
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"regexp"
    21  	"strings"
    22  	"time"
    23  
    24  	"google.golang.org/grpc/codes"
    25  	"google.golang.org/grpc/status"
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	"go.chromium.org/luci/common/errors"
    29  	ds "go.chromium.org/luci/gae/service/datastore"
    30  	"go.chromium.org/luci/logdog/api/logpb"
    31  	"go.chromium.org/luci/logdog/common/types"
    32  )
    33  
    34  // CurrentSchemaVersion is the current schema version of the LogStream.
    35  // Changes that are not backward-compatible should update this field so
    36  // migration logic and scripts can translate appropriately.
    37  //
    38  // History:
    39  //
    40  //	1 - Contained _Tags and _C queryable fields
    41  //	2 - Removed _Tags and _C queryable fields and applied noindex to
    42  //	    most fields, since query filtering is now implemented in-memory instead
    43  //	    of via datastore filters.
    44  //	3 - Removed all non-indexed fields which are redundant with content in
    45  //	    Descriptor.
    46  const CurrentSchemaVersion = "3"
    47  
    48  // ErrPathNotFound is the canonical error returned when a Log Stream Path is not found.
    49  var ErrPathNotFound = status.Error(codes.NotFound, "path not found")
    50  
    51  // LogStreamExpiry is the duration after creation that a LogStream
    52  // record should persist for.  After this duration it may be deleted.
    53  const LogStreamExpiry = 540 * 24 * time.Hour
    54  
    55  // LogStream is the primary datastore model containing information and state of
    56  // an individual log stream.
    57  type LogStream struct {
    58  	// ID is the LogStream ID. It is generated from the stream's Prefix/Name
    59  	// fields.
    60  	ID HashID `gae:"$id"`
    61  
    62  	// Schema is the datastore schema version for this object. This can be used
    63  	// to facilitate schema migrations.
    64  	//
    65  	// The current schema is currentSchemaVersion.
    66  	Schema string // index needed for batch conversions
    67  
    68  	// Prefix is this log stream's prefix value. Log streams with the same prefix
    69  	// are logically grouped.
    70  	//
    71  	// This value should not be changed once populated, as it will invalidate the
    72  	// ID.
    73  	Prefix string // index needed for Query RPC
    74  	// Name is the unique name of this log stream within the Prefix scope.
    75  	//
    76  	// This value should not be changed once populated, as it will invalidate the
    77  	// ID.
    78  	Name string `gae:",noindex"`
    79  
    80  	// Created is the time when this stream was created.
    81  	Created time.Time `gae:",noindex"`
    82  	// ExpireAt is time after which the datastore entry for the stream will be deleted.
    83  	ExpireAt time.Time `gae:",noindex"`
    84  
    85  	// Purged, if true, indicates that this log stream has been marked as purged.
    86  	// Non-administrative queries and requests for this stream will operate as
    87  	// if this entry doesn't exist.
    88  	Purged bool `gae:",noindex"`
    89  	// PurgedTime is the time when this stream was purged.
    90  	PurgedTime time.Time `gae:",noindex"`
    91  
    92  	// ProtoVersion is the version string of the protobuf, as reported by the
    93  	// Collector (and ultimately self-identified by the Butler).
    94  	ProtoVersion string `gae:",noindex"`
    95  	// Descriptor is the binary protobuf data LogStreamDescriptor.
    96  	Descriptor []byte `gae:",noindex"`
    97  
    98  	// extra causes datastore to ignore unrecognized fields and strip them in
    99  	// future writes.
   100  	extra ds.PropertyMap `gae:"-,extra"`
   101  
   102  	// noDSValidate is a testing parameter to instruct the LogStream not to
   103  	// validate before reading/writing to datastore. It can be controlled by
   104  	// calling SetDSValidate().
   105  	noDSValidate bool
   106  }
   107  
   108  var _ interface {
   109  	ds.PropertyLoadSaver
   110  } = (*LogStream)(nil)
   111  
   112  // LogStreamID returns the HashID for a given log stream path.
   113  func LogStreamID(path types.StreamPath) HashID {
   114  	return makeHashID(string(path))
   115  }
   116  
   117  // PopulateState populates the datastore key fields for the supplied
   118  // LogStreamState, binding them to the current LogStream.
   119  func (s *LogStream) PopulateState(c context.Context, lst *LogStreamState) {
   120  	lst.Parent = ds.KeyForObj(c, s)
   121  }
   122  
   123  // State returns the LogStreamState keyed for this LogStream.
   124  func (s *LogStream) State(c context.Context) *LogStreamState {
   125  	var lst LogStreamState
   126  	s.PopulateState(c, &lst)
   127  	return &lst
   128  }
   129  
   130  // Path returns the LogDog path for this log stream.
   131  func (s *LogStream) Path() types.StreamPath {
   132  	return types.StreamName(s.Prefix).Join(types.StreamName(s.Name))
   133  }
   134  
   135  // Load implements ds.PropertyLoadSaver.
   136  func (s *LogStream) Load(pmap ds.PropertyMap) error {
   137  	// Drop old _C and _Tags fields to save memory.
   138  	//   * _C is is derived entirely from Prefix and Name
   139  	//   * _Tags is derived entirely from Descriptor
   140  	//   * Tags is derived entirely from Descriptor (and briefly appeared in
   141  	//     schema version 2)
   142  	delete(pmap, "_C")
   143  	delete(pmap, "_Tags")
   144  	delete(pmap, "Tags")
   145  
   146  	if err := ds.GetPLS(s).Load(pmap); err != nil {
   147  		return err
   148  	}
   149  
   150  	// Validate the log stream. Don't enforce ID correctness, since
   151  	// datastore hasn't populated that field yet.
   152  	if !s.noDSValidate {
   153  		if err := s.validateImpl(false); err != nil {
   154  			return err
   155  		}
   156  	}
   157  	return nil
   158  }
   159  
   160  // Save implements ds.PropertyLoadSaver.
   161  func (s *LogStream) Save(withMeta bool) (ds.PropertyMap, error) {
   162  	if !s.noDSValidate {
   163  		if err := s.validateImpl(true); err != nil {
   164  			return nil, err
   165  		}
   166  	}
   167  	s.Schema = CurrentSchemaVersion
   168  
   169  	return ds.GetPLS(s).Save(withMeta)
   170  }
   171  
   172  // Validate evaluates the state and data contents of the LogStream and returns
   173  // an error if it is invalid.
   174  func (s *LogStream) Validate() error {
   175  	return s.validateImpl(true)
   176  }
   177  
   178  func (s *LogStream) validateImpl(enforceHashID bool) error {
   179  	if enforceHashID {
   180  		// Make sure our Prefix and Name match the Hash ID.
   181  		if hid := LogStreamID(s.Path()); hid != s.ID {
   182  			return fmt.Errorf("hash IDs don't match (%q != %q)", hid, s.ID)
   183  		}
   184  	}
   185  
   186  	if err := types.StreamName(s.Prefix).Validate(); err != nil {
   187  		return fmt.Errorf("invalid prefix: %s", err)
   188  	}
   189  	if err := types.StreamName(s.Name).Validate(); err != nil {
   190  		return fmt.Errorf("invalid name: %s", err)
   191  	}
   192  	if s.Created.IsZero() {
   193  		return errors.New("created time is not set")
   194  	}
   195  
   196  	// Ensure that our Descriptor can be unmarshalled.
   197  	if _, err := s.DescriptorProto(); err != nil {
   198  		return fmt.Errorf("could not unmarshal descriptor: %v", err)
   199  	}
   200  	return nil
   201  }
   202  
   203  // LoadDescriptor loads the fields in the log stream descriptor into this
   204  // LogStream entry. These fields are:
   205  //   - Prefix
   206  //   - Name
   207  //   - Descriptor
   208  func (s *LogStream) LoadDescriptor(desc *logpb.LogStreamDescriptor) error {
   209  	if err := desc.Validate(true); err != nil {
   210  		return fmt.Errorf("invalid descriptor: %v", err)
   211  	}
   212  
   213  	pb, err := proto.Marshal(desc)
   214  	if err != nil {
   215  		return fmt.Errorf("failed to marshal descriptor: %v", err)
   216  	}
   217  
   218  	s.Prefix = desc.Prefix
   219  	s.Name = desc.Name
   220  	s.Descriptor = pb
   221  
   222  	return nil
   223  }
   224  
   225  // DescriptorProto unmarshals a LogStreamDescriptor from the stream's Descriptor
   226  // field. It will return an error if the unmarshalling fails.
   227  func (s *LogStream) DescriptorProto() (*logpb.LogStreamDescriptor, error) {
   228  	desc := logpb.LogStreamDescriptor{}
   229  	if err := proto.Unmarshal(s.Descriptor, &desc); err != nil {
   230  		return nil, err
   231  	}
   232  	return &desc, nil
   233  }
   234  
   235  // SetDSValidate controls whether this LogStream is validated prior to being
   236  // read from or written to datastore.
   237  //
   238  // This is a testing parameter, and should NOT be used in production code.
   239  func (s *LogStream) SetDSValidate(v bool) {
   240  	s.noDSValidate = !v
   241  }
   242  
   243  // LogStreamQuery is a function returning `true` if the provided LogStream
   244  // matches.
   245  type LogStreamQuery struct {
   246  	Prefix types.StreamName // the prefix being queried
   247  
   248  	q             *ds.Query
   249  	includePurged bool
   250  	checks        []func(*LogStream) bool
   251  	descChecks    []func(*logpb.LogStreamDescriptor) bool
   252  }
   253  
   254  // NewLogStreamQuery returns a new LogStreamQuery constrained to the prefix of
   255  // `pathGlob`, and with a filter function for the stream name in `pathGlob`.
   256  //
   257  // By default, it will exclude purged logs.
   258  //
   259  // pathGlob must have a prefix without wildcards, and a stream name portion
   260  // which can include `*` or `**` in any combination.
   261  //
   262  // Returns an error if the supplied pathGlob string describes an invalid query.
   263  func NewLogStreamQuery(pathGlob string) (*LogStreamQuery, error) {
   264  	prefix, name := types.StreamPath(pathGlob).Split()
   265  
   266  	if prefix == "" {
   267  		return nil, errors.New("prefix invalid: empty")
   268  	}
   269  	if strings.ContainsRune(string(prefix), '*') {
   270  		return nil, errors.New("prefix invalid: contains wildcard `*`")
   271  	}
   272  	if err := prefix.Validate(); err != nil {
   273  		return nil, errors.Annotate(err, "prefix invalid").Err()
   274  	}
   275  
   276  	if name == "" {
   277  		name = "**"
   278  	}
   279  	if err := types.StreamName(strings.Replace(string(name), "*", "a", -1)).Validate(); err != nil {
   280  		return nil, errors.Annotate(err, "name invalid").Err()
   281  	}
   282  
   283  	ret := &LogStreamQuery{
   284  		Prefix: prefix,
   285  		q:      ds.NewQuery("LogStream").Eq("Prefix", string(prefix)),
   286  	}
   287  
   288  	// Escape all regexp metachars. This will have the effect of escaping * as
   289  	// well. We can then replace sequences of escaped *'s to get the expression we
   290  	// want.
   291  	nameEscaped := regexp.QuoteMeta(string(name))
   292  	exp := strings.NewReplacer(
   293  		"/\\*\\*/", "(.*)/",
   294  		"/\\*\\*", "(.*)",
   295  		"\\*\\*/", "(.*)",
   296  		"\\*\\*", "(.*)",
   297  		"\\*", "([^/][^/]*)",
   298  	).Replace(nameEscaped)
   299  
   300  	re, err := regexp.Compile(fmt.Sprintf("^%s$", exp))
   301  	if err != nil {
   302  		return nil, errors.Annotate(err, "compiling name regex").Err()
   303  	}
   304  
   305  	// this function implements the check for purged as well as the name
   306  	// assertion.
   307  	ret.checks = append(ret.checks, func(ls *LogStream) bool {
   308  		if !ret.includePurged && ls.Purged {
   309  			return false
   310  		}
   311  		return re.MatchString(ls.Name)
   312  	})
   313  
   314  	return ret, nil
   315  }
   316  
   317  // SetCursor causes the LogStreamQuery to start from the given encoded cursor.
   318  func (lsp *LogStreamQuery) SetCursor(ctx context.Context, cursor string) error {
   319  	if cursor == "" {
   320  		return nil
   321  	}
   322  
   323  	cursorObj, err := ds.DecodeCursor(ctx, cursor)
   324  	if err != nil {
   325  		return err
   326  	}
   327  
   328  	lsp.q = lsp.q.Start(cursorObj)
   329  	return nil
   330  }
   331  
   332  // OnlyContentType constrains the LogStreamQuery to only return LogStreams of
   333  // the given content type.
   334  func (lsp *LogStreamQuery) OnlyContentType(ctype string) {
   335  	if ctype == "" {
   336  		return
   337  	}
   338  	lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool {
   339  		return desc.ContentType == ctype
   340  	})
   341  }
   342  
   343  // OnlyStreamType constrains the LogStreamQuery to only return LogStreams of
   344  // the given stream type.
   345  func (lsp *LogStreamQuery) OnlyStreamType(stype logpb.StreamType) error {
   346  	if _, ok := logpb.StreamType_name[int32(stype)]; !ok {
   347  		return errors.New("unknown StreamType")
   348  	}
   349  	lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool {
   350  		return desc.StreamType == stype
   351  	})
   352  	return nil
   353  }
   354  
   355  // IncludePurged will have the LogStreamQuery return purged logs as well.
   356  func (lsp *LogStreamQuery) IncludePurged() {
   357  	lsp.includePurged = true
   358  }
   359  
   360  // OnlyPurged will have the LogStreamQuery return ONLY purged logs.
   361  //
   362  // Will result in NO logs if IncludePurged hasn't been set.
   363  func (lsp *LogStreamQuery) OnlyPurged() {
   364  	lsp.checks = append(lsp.checks, func(ls *LogStream) bool {
   365  		return ls.Purged
   366  	})
   367  }
   368  
   369  // MustHaveTags constrains LogStreams returned to have all of the given tags.
   370  func (lsp *LogStreamQuery) MustHaveTags(tags map[string]string) {
   371  	lsp.descChecks = append(lsp.descChecks, func(desc *logpb.LogStreamDescriptor) bool {
   372  		for k, v := range tags {
   373  			actual, ok := desc.Tags[k]
   374  			if !ok {
   375  				return false
   376  			}
   377  			if v != "" && v != actual {
   378  				return false
   379  			}
   380  		}
   381  		return true
   382  	})
   383  }
   384  
   385  func (lsp *LogStreamQuery) filter(ls *LogStream) bool {
   386  	for _, checkFn := range lsp.checks {
   387  		if !checkFn(ls) {
   388  			return false
   389  		}
   390  	}
   391  	if len(lsp.descChecks) > 0 {
   392  		desc, err := ls.DescriptorProto()
   393  		if err != nil {
   394  			return false
   395  		}
   396  
   397  		for _, checkFn := range lsp.descChecks {
   398  			if !checkFn(desc) {
   399  				return false
   400  			}
   401  		}
   402  	}
   403  	return true
   404  }
   405  
   406  // Run executes the LogStreamQuery and calls `cb` with each LogStream which
   407  // matches the LogStreamQuery.
   408  //
   409  // If `cb` returns ds.Stop, the query will stop with a nil error.
   410  // If `cb` returns a different error, the query will stop with the returned
   411  // error.
   412  // If `cb` returns nil, the query continues until it exhausts.
   413  func (lsp *LogStreamQuery) Run(ctx context.Context, cb func(*LogStream, ds.CursorCB) error) error {
   414  	return ds.Run(ctx, lsp.q, func(ls *LogStream, getCursor ds.CursorCB) (err error) {
   415  		if lsp.filter(ls) {
   416  			err = cb(ls, getCursor)
   417  		}
   418  		return
   419  	})
   420  }