github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/metrics/matcher/namespaces.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package matcher
    22  
    23  import (
    24  	"errors"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/m3db/m3/src/cluster/kv"
    29  	"github.com/m3db/m3/src/cluster/kv/util/runtime"
    30  	"github.com/m3db/m3/src/metrics/aggregation"
    31  	"github.com/m3db/m3/src/metrics/generated/proto/rulepb"
    32  	"github.com/m3db/m3/src/metrics/matcher/namespace"
    33  	"github.com/m3db/m3/src/metrics/metric"
    34  	"github.com/m3db/m3/src/metrics/metric/id"
    35  	"github.com/m3db/m3/src/metrics/rules"
    36  	"github.com/m3db/m3/src/metrics/rules/view"
    37  	"github.com/m3db/m3/src/x/clock"
    38  	xerrors "github.com/m3db/m3/src/x/errors"
    39  	xos "github.com/m3db/m3/src/x/os"
    40  	"github.com/m3db/m3/src/x/watch"
    41  
    42  	"github.com/uber-go/tally"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  var (
    47  	emptyNamespaces rules.Namespaces
    48  	errNilValue     = errors.New("nil value received")
    49  )
    50  
    51  // Namespaces manages runtime updates to registered namespaces and provides
    52  // API to match metic ids against rules in the corresponding namespaces.
    53  type Namespaces interface {
    54  	rules.ActiveSet
    55  	// Open opens the namespaces and starts watching runtime rule updates
    56  	Open() error
    57  
    58  	// Version returns the current version for a given namespace.
    59  	Version(namespace []byte) int
    60  
    61  	// Close closes the namespaces.
    62  	Close()
    63  }
    64  
    65  type rulesNamespace rules.Namespace
    66  
    67  type namespacesMetrics struct {
    68  	notExists         tally.Counter
    69  	added             tally.Counter
    70  	removed           tally.Counter
    71  	watched           tally.Counter
    72  	watchErrors       tally.Counter
    73  	unwatched         tally.Counter
    74  	createWatchErrors tally.Counter
    75  	initWatchErrors   tally.Counter
    76  }
    77  
    78  func newNamespacesMetrics(scope tally.Scope) namespacesMetrics {
    79  	return namespacesMetrics{
    80  		notExists:         scope.Counter("not-exists"),
    81  		added:             scope.Counter("added"),
    82  		removed:           scope.Counter("removed"),
    83  		watched:           scope.Counter("watched"),
    84  		watchErrors:       scope.Counter("watch-errors"),
    85  		unwatched:         scope.Counter("unwatched"),
    86  		createWatchErrors: scope.Counter("create-watch-errors"),
    87  		initWatchErrors:   scope.Counter("init-watch-errors"),
    88  	}
    89  }
    90  
    91  // namespaces contains the list of namespace users have defined rules for.
    92  type namespaces struct {
    93  	sync.RWMutex
    94  	runtime.Value
    95  
    96  	key                  string
    97  	store                kv.Store
    98  	opts                 Options
    99  	nowFn                clock.NowFn
   100  	log                  *zap.Logger
   101  	ruleSetKeyFn         RuleSetKeyFn
   102  	matchRangePast       time.Duration
   103  	onNamespaceAddedFn   OnNamespaceAddedFn
   104  	onNamespaceRemovedFn OnNamespaceRemovedFn
   105  
   106  	proto                       *rulepb.Namespaces
   107  	rules                       *namespaceRuleSetsMap
   108  	metrics                     namespacesMetrics
   109  	nsResolver                  namespace.Resolver
   110  	requireNamespaceWatchOnInit bool
   111  }
   112  
   113  // NewNamespaces creates a new namespaces object.
   114  func NewNamespaces(key string, opts Options) Namespaces {
   115  	instrumentOpts := opts.InstrumentOptions()
   116  	n := &namespaces{
   117  		key:                         key,
   118  		store:                       opts.KVStore(),
   119  		opts:                        opts,
   120  		nowFn:                       opts.ClockOptions().NowFn(),
   121  		log:                         instrumentOpts.Logger(),
   122  		ruleSetKeyFn:                opts.RuleSetKeyFn(),
   123  		matchRangePast:              opts.MatchRangePast(),
   124  		onNamespaceAddedFn:          opts.OnNamespaceAddedFn(),
   125  		onNamespaceRemovedFn:        opts.OnNamespaceRemovedFn(),
   126  		proto:                       &rulepb.Namespaces{},
   127  		rules:                       newNamespaceRuleSetsMap(namespaceRuleSetsMapOptions{}),
   128  		metrics:                     newNamespacesMetrics(instrumentOpts.MetricsScope()),
   129  		requireNamespaceWatchOnInit: opts.RequireNamespaceWatchOnInit(),
   130  		nsResolver:                  opts.NamespaceResolver(),
   131  	}
   132  	valueOpts := runtime.NewOptions().
   133  		SetInstrumentOptions(instrumentOpts).
   134  		SetInitWatchTimeout(opts.InitWatchTimeout()).
   135  		SetKVStore(n.store).
   136  		SetUnmarshalFn(n.toNamespaces).
   137  		SetProcessFn(n.process).
   138  		SetInterruptedCh(opts.InterruptedCh())
   139  	n.Value = runtime.NewValue(key, valueOpts)
   140  	return n
   141  }
   142  
   143  func (n *namespaces) Open() error {
   144  	err := n.Watch()
   145  	var interruptErr *xos.InterruptError
   146  	if err == nil {
   147  		return nil
   148  	} else if errors.As(err, &interruptErr) {
   149  		return err
   150  	}
   151  
   152  	errCreateWatch, ok := err.(watch.CreateWatchError)
   153  	if ok {
   154  		n.metrics.createWatchErrors.Inc(1)
   155  		return errCreateWatch
   156  	}
   157  	// NB(xichen): we managed to watch the key but weren't able
   158  	// to initialize the value. In this case, log the error instead
   159  	// to be more resilient to error conditions preventing process
   160  	// from starting up.
   161  	n.metrics.initWatchErrors.Inc(1)
   162  	if n.requireNamespaceWatchOnInit {
   163  		return err
   164  	}
   165  
   166  	n.opts.InstrumentOptions().Logger().With(
   167  		zap.String("key", n.key),
   168  		zap.Error(err),
   169  	).Error("error initializing namespaces values, retrying in the background")
   170  
   171  	return nil
   172  }
   173  
   174  func (n *namespaces) Version(namespace []byte) int {
   175  	n.RLock()
   176  	ruleSet, exists := n.rules.Get(namespace)
   177  	n.RUnlock()
   178  	if !exists {
   179  		return kv.UninitializedVersion
   180  	}
   181  	return ruleSet.Version()
   182  }
   183  
   184  func (n *namespaces) LatestRollupRules(namespace []byte, timeNanos int64) ([]view.RollupRule, error) {
   185  	ruleSet, exists := n.ruleSet(namespace)
   186  	if !exists {
   187  		return nil, errors.New("ruleset not found for namespace")
   188  	}
   189  
   190  	return ruleSet.LatestRollupRules(namespace, timeNanos)
   191  }
   192  
   193  func (n *namespaces) ForwardMatch(id id.ID, fromNanos, toNanos int64,
   194  	opts rules.MatchOptions) (rules.MatchResult, error) {
   195  	namespace := n.nsResolver.Resolve(id)
   196  	ruleSet, exists := n.ruleSet(namespace)
   197  	if !exists {
   198  		return rules.EmptyMatchResult, nil
   199  	}
   200  	return ruleSet.ForwardMatch(id, fromNanos, toNanos, opts)
   201  }
   202  
   203  func (n *namespaces) ReverseMatch(
   204  	id id.ID,
   205  	fromNanos, toNanos int64,
   206  	mt metric.Type,
   207  	at aggregation.Type,
   208  	isMultiAggregationTypesAllowed bool,
   209  	aggTypesOpts aggregation.TypesOptions,
   210  ) (rules.MatchResult, error) {
   211  	namespace := n.nsResolver.Resolve(id)
   212  	ruleSet, exists := n.ruleSet(namespace)
   213  	if !exists {
   214  		return rules.EmptyMatchResult, nil
   215  	}
   216  	return ruleSet.ReverseMatch(id, fromNanos, toNanos, mt, at, isMultiAggregationTypesAllowed, aggTypesOpts)
   217  }
   218  
   219  func (n *namespaces) ruleSet(namespace []byte) (RuleSet, bool) {
   220  	n.RLock()
   221  	ruleSet, exists := n.rules.Get(namespace)
   222  	n.RUnlock()
   223  	if !exists {
   224  		n.metrics.notExists.Inc(1)
   225  	}
   226  	return ruleSet, exists
   227  }
   228  
   229  func (n *namespaces) Close() {
   230  	// NB(xichen): we stop watching the value outside lock because otherwise we might
   231  	// be holding the namespace lock while attempting to acquire the value lock, and
   232  	// the updating goroutine might be holding the value lock and attempting to
   233  	// acquire the namespace lock, causing a deadlock.
   234  	n.Value.Unwatch()
   235  
   236  	n.RLock()
   237  	for _, entry := range n.rules.Iter() {
   238  		rs := entry.Value()
   239  		rs.Unwatch()
   240  	}
   241  	n.RUnlock()
   242  }
   243  
   244  func (n *namespaces) toNamespaces(value kv.Value) (interface{}, error) {
   245  	n.Lock()
   246  	defer n.Unlock()
   247  
   248  	if value == nil {
   249  		return emptyNamespaces, errNilValue
   250  	}
   251  	n.proto.Reset()
   252  	if err := value.Unmarshal(n.proto); err != nil {
   253  		return emptyNamespaces, err
   254  	}
   255  	return rules.NewNamespaces(value.Version(), n.proto)
   256  }
   257  
   258  func (n *namespaces) process(value interface{}) error {
   259  	var (
   260  		nss        = value.(rules.Namespaces)
   261  		version    = nss.Version()
   262  		namespaces = nss.Namespaces()
   263  		incoming   = newRuleNamespacesMap(ruleNamespacesMapOptions{
   264  			InitialSize: len(namespaces),
   265  		})
   266  	)
   267  	for _, ns := range namespaces {
   268  		incoming.Set(ns.Name(), rulesNamespace(ns))
   269  	}
   270  
   271  	n.Lock()
   272  	defer n.Unlock()
   273  
   274  	var (
   275  		watchWg  sync.WaitGroup
   276  		multiErr xerrors.MultiError
   277  		errLock  sync.Mutex
   278  	)
   279  
   280  	for _, entry := range incoming.Iter() {
   281  		namespace, elem := entry.Key(), rules.Namespace(entry.Value())
   282  		nsName, snapshots := elem.Name(), elem.Snapshots()
   283  		ruleSet, exists := n.rules.Get(namespace)
   284  		if !exists {
   285  			instrumentOpts := n.opts.InstrumentOptions()
   286  			ruleSetScope := instrumentOpts.MetricsScope().SubScope("ruleset")
   287  			ruleSetOpts := n.opts.SetInstrumentOptions(instrumentOpts.SetMetricsScope(ruleSetScope))
   288  			ruleSetKey := n.ruleSetKeyFn(elem.Name())
   289  			ruleSet = newRuleSet(nsName, ruleSetKey, ruleSetOpts)
   290  			n.rules.Set(namespace, ruleSet)
   291  			n.metrics.added.Inc(1)
   292  		}
   293  
   294  		shouldWatch := true
   295  		// This should never happen but just to be on the defensive side.
   296  		if len(snapshots) == 0 {
   297  			n.log.Warn("namespace updates have no snapshots", zap.Int("version", version))
   298  		} else {
   299  			latestSnapshot := snapshots[len(snapshots)-1]
   300  			// If the latest update shows the namespace is tombstoned, and we
   301  			// have received the corresponding ruleset update, we can stop watching
   302  			// the ruleset updates.
   303  			if latestSnapshot.Tombstoned() && latestSnapshot.ForRuleSetVersion() == ruleSet.Version() {
   304  				shouldWatch = false
   305  			}
   306  		}
   307  
   308  		if !shouldWatch {
   309  			n.metrics.unwatched.Inc(1)
   310  			ruleSet.Unwatch()
   311  		} else {
   312  			n.metrics.watched.Inc(1)
   313  
   314  			watchWg.Add(1)
   315  			go func() {
   316  				// Start the watches in background goroutines so that if the store is unavailable they timeout
   317  				// (approximately) in unison. This prevents the timeouts from stacking on top of each
   318  				// other when the store is unavailable and causing a delay of timeout_duration * num_rules.
   319  				defer watchWg.Done()
   320  
   321  				if err := ruleSet.Watch(); err != nil {
   322  					n.metrics.watchErrors.Inc(1)
   323  					n.log.Error("failed to watch ruleset updates",
   324  						zap.String("ruleSetKey", ruleSet.Key()),
   325  						zap.Error(err))
   326  
   327  					// Track errors if we explicitly want to ensure watches succeed.
   328  					if n.requireNamespaceWatchOnInit {
   329  						errLock.Lock()
   330  						multiErr = multiErr.Add(err)
   331  						errLock.Unlock()
   332  					}
   333  				}
   334  			}()
   335  		}
   336  
   337  		if !exists && n.onNamespaceAddedFn != nil {
   338  			n.onNamespaceAddedFn(nsName, ruleSet)
   339  		}
   340  	}
   341  
   342  	watchWg.Wait()
   343  
   344  	if !multiErr.Empty() {
   345  		return multiErr.FinalError()
   346  	}
   347  
   348  	for _, entry := range n.rules.Iter() {
   349  		namespace, ruleSet := entry.Key(), entry.Value()
   350  		_, exists := incoming.Get(namespace)
   351  		if exists {
   352  			continue
   353  		}
   354  		// Process the namespaces not in the incoming update.
   355  		earliestNanos := n.nowFn().Add(-n.matchRangePast).UnixNano()
   356  		if ruleSet.Tombstoned() && ruleSet.CutoverNanos() <= earliestNanos {
   357  			if n.onNamespaceRemovedFn != nil {
   358  				n.onNamespaceRemovedFn(ruleSet.Namespace())
   359  			}
   360  			n.rules.Delete(namespace)
   361  			ruleSet.Unwatch()
   362  			n.metrics.unwatched.Inc(1)
   363  		}
   364  	}
   365  
   366  	return nil
   367  }