github.com/m3db/m3@v1.5.0/src/metrics/matcher/namespaces.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package matcher 22 23 import ( 24 "errors" 25 "sync" 26 "time" 27 28 "github.com/m3db/m3/src/cluster/kv" 29 "github.com/m3db/m3/src/cluster/kv/util/runtime" 30 "github.com/m3db/m3/src/metrics/aggregation" 31 "github.com/m3db/m3/src/metrics/generated/proto/rulepb" 32 "github.com/m3db/m3/src/metrics/matcher/namespace" 33 "github.com/m3db/m3/src/metrics/metric" 34 "github.com/m3db/m3/src/metrics/metric/id" 35 "github.com/m3db/m3/src/metrics/rules" 36 "github.com/m3db/m3/src/metrics/rules/view" 37 "github.com/m3db/m3/src/x/clock" 38 xerrors "github.com/m3db/m3/src/x/errors" 39 xos "github.com/m3db/m3/src/x/os" 40 "github.com/m3db/m3/src/x/watch" 41 42 "github.com/uber-go/tally" 43 "go.uber.org/zap" 44 ) 45 46 var ( 47 emptyNamespaces rules.Namespaces 48 errNilValue = errors.New("nil value received") 49 ) 50 51 // Namespaces manages runtime updates to registered namespaces and provides 52 // API to match metic ids against rules in the corresponding namespaces. 53 type Namespaces interface { 54 rules.ActiveSet 55 // Open opens the namespaces and starts watching runtime rule updates 56 Open() error 57 58 // Version returns the current version for a given namespace. 59 Version(namespace []byte) int 60 61 // Close closes the namespaces. 62 Close() 63 } 64 65 type rulesNamespace rules.Namespace 66 67 type namespacesMetrics struct { 68 notExists tally.Counter 69 added tally.Counter 70 removed tally.Counter 71 watched tally.Counter 72 watchErrors tally.Counter 73 unwatched tally.Counter 74 createWatchErrors tally.Counter 75 initWatchErrors tally.Counter 76 } 77 78 func newNamespacesMetrics(scope tally.Scope) namespacesMetrics { 79 return namespacesMetrics{ 80 notExists: scope.Counter("not-exists"), 81 added: scope.Counter("added"), 82 removed: scope.Counter("removed"), 83 watched: scope.Counter("watched"), 84 watchErrors: scope.Counter("watch-errors"), 85 unwatched: scope.Counter("unwatched"), 86 createWatchErrors: scope.Counter("create-watch-errors"), 87 initWatchErrors: scope.Counter("init-watch-errors"), 88 } 89 } 90 91 // namespaces contains the list of namespace users have defined rules for. 92 type namespaces struct { 93 sync.RWMutex 94 runtime.Value 95 96 key string 97 store kv.Store 98 opts Options 99 nowFn clock.NowFn 100 log *zap.Logger 101 ruleSetKeyFn RuleSetKeyFn 102 matchRangePast time.Duration 103 onNamespaceAddedFn OnNamespaceAddedFn 104 onNamespaceRemovedFn OnNamespaceRemovedFn 105 106 proto *rulepb.Namespaces 107 rules *namespaceRuleSetsMap 108 metrics namespacesMetrics 109 nsResolver namespace.Resolver 110 requireNamespaceWatchOnInit bool 111 } 112 113 // NewNamespaces creates a new namespaces object. 114 func NewNamespaces(key string, opts Options) Namespaces { 115 instrumentOpts := opts.InstrumentOptions() 116 n := &namespaces{ 117 key: key, 118 store: opts.KVStore(), 119 opts: opts, 120 nowFn: opts.ClockOptions().NowFn(), 121 log: instrumentOpts.Logger(), 122 ruleSetKeyFn: opts.RuleSetKeyFn(), 123 matchRangePast: opts.MatchRangePast(), 124 onNamespaceAddedFn: opts.OnNamespaceAddedFn(), 125 onNamespaceRemovedFn: opts.OnNamespaceRemovedFn(), 126 proto: &rulepb.Namespaces{}, 127 rules: newNamespaceRuleSetsMap(namespaceRuleSetsMapOptions{}), 128 metrics: newNamespacesMetrics(instrumentOpts.MetricsScope()), 129 requireNamespaceWatchOnInit: opts.RequireNamespaceWatchOnInit(), 130 nsResolver: opts.NamespaceResolver(), 131 } 132 valueOpts := runtime.NewOptions(). 133 SetInstrumentOptions(instrumentOpts). 134 SetInitWatchTimeout(opts.InitWatchTimeout()). 135 SetKVStore(n.store). 136 SetUnmarshalFn(n.toNamespaces). 137 SetProcessFn(n.process). 138 SetInterruptedCh(opts.InterruptedCh()) 139 n.Value = runtime.NewValue(key, valueOpts) 140 return n 141 } 142 143 func (n *namespaces) Open() error { 144 err := n.Watch() 145 var interruptErr *xos.InterruptError 146 if err == nil { 147 return nil 148 } else if errors.As(err, &interruptErr) { 149 return err 150 } 151 152 errCreateWatch, ok := err.(watch.CreateWatchError) 153 if ok { 154 n.metrics.createWatchErrors.Inc(1) 155 return errCreateWatch 156 } 157 // NB(xichen): we managed to watch the key but weren't able 158 // to initialize the value. In this case, log the error instead 159 // to be more resilient to error conditions preventing process 160 // from starting up. 161 n.metrics.initWatchErrors.Inc(1) 162 if n.requireNamespaceWatchOnInit { 163 return err 164 } 165 166 n.opts.InstrumentOptions().Logger().With( 167 zap.String("key", n.key), 168 zap.Error(err), 169 ).Error("error initializing namespaces values, retrying in the background") 170 171 return nil 172 } 173 174 func (n *namespaces) Version(namespace []byte) int { 175 n.RLock() 176 ruleSet, exists := n.rules.Get(namespace) 177 n.RUnlock() 178 if !exists { 179 return kv.UninitializedVersion 180 } 181 return ruleSet.Version() 182 } 183 184 func (n *namespaces) LatestRollupRules(namespace []byte, timeNanos int64) ([]view.RollupRule, error) { 185 ruleSet, exists := n.ruleSet(namespace) 186 if !exists { 187 return nil, errors.New("ruleset not found for namespace") 188 } 189 190 return ruleSet.LatestRollupRules(namespace, timeNanos) 191 } 192 193 func (n *namespaces) ForwardMatch(id id.ID, fromNanos, toNanos int64, 194 opts rules.MatchOptions) (rules.MatchResult, error) { 195 namespace := n.nsResolver.Resolve(id) 196 ruleSet, exists := n.ruleSet(namespace) 197 if !exists { 198 return rules.EmptyMatchResult, nil 199 } 200 return ruleSet.ForwardMatch(id, fromNanos, toNanos, opts) 201 } 202 203 func (n *namespaces) ReverseMatch( 204 id id.ID, 205 fromNanos, toNanos int64, 206 mt metric.Type, 207 at aggregation.Type, 208 isMultiAggregationTypesAllowed bool, 209 aggTypesOpts aggregation.TypesOptions, 210 ) (rules.MatchResult, error) { 211 namespace := n.nsResolver.Resolve(id) 212 ruleSet, exists := n.ruleSet(namespace) 213 if !exists { 214 return rules.EmptyMatchResult, nil 215 } 216 return ruleSet.ReverseMatch(id, fromNanos, toNanos, mt, at, isMultiAggregationTypesAllowed, aggTypesOpts) 217 } 218 219 func (n *namespaces) ruleSet(namespace []byte) (RuleSet, bool) { 220 n.RLock() 221 ruleSet, exists := n.rules.Get(namespace) 222 n.RUnlock() 223 if !exists { 224 n.metrics.notExists.Inc(1) 225 } 226 return ruleSet, exists 227 } 228 229 func (n *namespaces) Close() { 230 // NB(xichen): we stop watching the value outside lock because otherwise we might 231 // be holding the namespace lock while attempting to acquire the value lock, and 232 // the updating goroutine might be holding the value lock and attempting to 233 // acquire the namespace lock, causing a deadlock. 234 n.Value.Unwatch() 235 236 n.RLock() 237 for _, entry := range n.rules.Iter() { 238 rs := entry.Value() 239 rs.Unwatch() 240 } 241 n.RUnlock() 242 } 243 244 func (n *namespaces) toNamespaces(value kv.Value) (interface{}, error) { 245 n.Lock() 246 defer n.Unlock() 247 248 if value == nil { 249 return emptyNamespaces, errNilValue 250 } 251 n.proto.Reset() 252 if err := value.Unmarshal(n.proto); err != nil { 253 return emptyNamespaces, err 254 } 255 return rules.NewNamespaces(value.Version(), n.proto) 256 } 257 258 func (n *namespaces) process(value interface{}) error { 259 var ( 260 nss = value.(rules.Namespaces) 261 version = nss.Version() 262 namespaces = nss.Namespaces() 263 incoming = newRuleNamespacesMap(ruleNamespacesMapOptions{ 264 InitialSize: len(namespaces), 265 }) 266 ) 267 for _, ns := range namespaces { 268 incoming.Set(ns.Name(), rulesNamespace(ns)) 269 } 270 271 n.Lock() 272 defer n.Unlock() 273 274 var ( 275 watchWg sync.WaitGroup 276 multiErr xerrors.MultiError 277 errLock sync.Mutex 278 ) 279 280 for _, entry := range incoming.Iter() { 281 namespace, elem := entry.Key(), rules.Namespace(entry.Value()) 282 nsName, snapshots := elem.Name(), elem.Snapshots() 283 ruleSet, exists := n.rules.Get(namespace) 284 if !exists { 285 instrumentOpts := n.opts.InstrumentOptions() 286 ruleSetScope := instrumentOpts.MetricsScope().SubScope("ruleset") 287 ruleSetOpts := n.opts.SetInstrumentOptions(instrumentOpts.SetMetricsScope(ruleSetScope)) 288 ruleSetKey := n.ruleSetKeyFn(elem.Name()) 289 ruleSet = newRuleSet(nsName, ruleSetKey, ruleSetOpts) 290 n.rules.Set(namespace, ruleSet) 291 n.metrics.added.Inc(1) 292 } 293 294 shouldWatch := true 295 // This should never happen but just to be on the defensive side. 296 if len(snapshots) == 0 { 297 n.log.Warn("namespace updates have no snapshots", zap.Int("version", version)) 298 } else { 299 latestSnapshot := snapshots[len(snapshots)-1] 300 // If the latest update shows the namespace is tombstoned, and we 301 // have received the corresponding ruleset update, we can stop watching 302 // the ruleset updates. 303 if latestSnapshot.Tombstoned() && latestSnapshot.ForRuleSetVersion() == ruleSet.Version() { 304 shouldWatch = false 305 } 306 } 307 308 if !shouldWatch { 309 n.metrics.unwatched.Inc(1) 310 ruleSet.Unwatch() 311 } else { 312 n.metrics.watched.Inc(1) 313 314 watchWg.Add(1) 315 go func() { 316 // Start the watches in background goroutines so that if the store is unavailable they timeout 317 // (approximately) in unison. This prevents the timeouts from stacking on top of each 318 // other when the store is unavailable and causing a delay of timeout_duration * num_rules. 319 defer watchWg.Done() 320 321 if err := ruleSet.Watch(); err != nil { 322 n.metrics.watchErrors.Inc(1) 323 n.log.Error("failed to watch ruleset updates", 324 zap.String("ruleSetKey", ruleSet.Key()), 325 zap.Error(err)) 326 327 // Track errors if we explicitly want to ensure watches succeed. 328 if n.requireNamespaceWatchOnInit { 329 errLock.Lock() 330 multiErr = multiErr.Add(err) 331 errLock.Unlock() 332 } 333 } 334 }() 335 } 336 337 if !exists && n.onNamespaceAddedFn != nil { 338 n.onNamespaceAddedFn(nsName, ruleSet) 339 } 340 } 341 342 watchWg.Wait() 343 344 if !multiErr.Empty() { 345 return multiErr.FinalError() 346 } 347 348 for _, entry := range n.rules.Iter() { 349 namespace, ruleSet := entry.Key(), entry.Value() 350 _, exists := incoming.Get(namespace) 351 if exists { 352 continue 353 } 354 // Process the namespaces not in the incoming update. 355 earliestNanos := n.nowFn().Add(-n.matchRangePast).UnixNano() 356 if ruleSet.Tombstoned() && ruleSet.CutoverNanos() <= earliestNanos { 357 if n.onNamespaceRemovedFn != nil { 358 n.onNamespaceRemovedFn(ruleSet.Namespace()) 359 } 360 n.rules.Delete(namespace) 361 ruleSet.Unwatch() 362 n.metrics.unwatched.Inc(1) 363 } 364 } 365 366 return nil 367 }