github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ruler/storage/instance/manager.go (about) 1 // This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics. 2 // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency. 3 // NOTE: many changes have been made to the original code for our use-case. 4 package instance 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/prometheus/prometheus/storage" 16 17 util_log "github.com/grafana/loki/pkg/util/log" 18 ) 19 20 var ( 21 // DefaultBasicManagerConfig is the default config for the BasicManager. 22 DefaultBasicManagerConfig = BasicManagerConfig{ 23 InstanceRestartBackoff: 5 * time.Second, 24 } 25 ) 26 27 // Manager represents a set of methods for manipulating running instances at 28 // runtime. 29 type Manager interface { 30 // GetInstance retrieves a ManagedInstance by name. 31 GetInstance(name string) (ManagedInstance, error) 32 33 // ListInstances returns all currently managed instances running 34 // within the Manager. The key will be the instance name from their config. 35 ListInstances() map[string]ManagedInstance 36 37 // ListConfigs returns the config objects associated with a managed 38 // instance. The key will be the Name field from Config. 39 ListConfigs() map[string]Config 40 41 // ApplyConfig creates a new Config or updates an existing Config if 42 // one with Config.Name already exists. 43 ApplyConfig(Config) error 44 45 // DeleteConfig deletes a given managed instance based on its Config.Name. 46 DeleteConfig(name string) error 47 48 // Ready indicates if all instances are ready for processing. 49 Ready() bool 50 51 // InstanceReady indicates if an instance is ready for processing. 52 InstanceReady(name string) bool 53 54 // Stop stops the Manager and all managed instances. 55 Stop() 56 } 57 58 // ManagedInstance is implemented by Instance. It is defined as an interface 59 // for the sake of testing from Manager implementations. 60 type ManagedInstance interface { 61 Ready() bool 62 Run(ctx context.Context) error 63 Update(c Config) error 64 StorageDirectory() string 65 Appender(ctx context.Context) storage.Appender 66 Stop() error 67 Tenant() string 68 } 69 70 // BasicManagerConfig controls the operations of a BasicManager. 71 type BasicManagerConfig struct { 72 InstanceRestartBackoff time.Duration 73 } 74 75 // BasicManager creates a new BasicManager, implementing the Manager interface. 76 // BasicManager will directly launch instances and perform no extra processing. 77 // 78 // Other implementations of Manager usually wrap a BasicManager. 79 type BasicManager struct { 80 cfgMut sync.Mutex 81 cfg BasicManagerConfig 82 logger log.Logger 83 metrics *Metrics 84 85 // Take care when locking mut: if you hold onto a lock of mut while calling 86 // Stop on a process, you will deadlock. 87 mut sync.Mutex 88 processes map[string]*managedProcess 89 90 launch Factory 91 } 92 93 // managedProcess represents a goroutine running a ManagedInstance. cancel 94 // requests that the goroutine should shutdown. done will be closed after the 95 // goroutine exists. 96 type managedProcess struct { 97 cfg Config 98 inst ManagedInstance 99 cancel context.CancelFunc 100 done chan bool 101 } 102 103 func (p managedProcess) Stop() { 104 if p.inst.Ready() { // Only stop initialized instances to avoid panic 105 if err := p.inst.Stop(); err != nil { 106 level.Error(util_log.Logger).Log("msg", "error while stopping instance", "user", p.inst.Tenant(), "err", err) 107 } 108 } 109 110 p.cancel() 111 <-p.done 112 } 113 114 // Factory should return an unstarted instance given some config. 115 type Factory func(c Config) (ManagedInstance, error) 116 117 // NewBasicManager creates a new BasicManager. The launch function will be 118 // invoked any time a new Config is applied. 119 // 120 // The lifecycle of any ManagedInstance returned by the launch function will 121 // be handled by the BasicManager. Instances will be automatically restarted 122 // if stopped, updated if the config changes, or removed when the Config is 123 // deleted. 124 func NewBasicManager(cfg BasicManagerConfig, metrics *Metrics, logger log.Logger, launch Factory) *BasicManager { 125 return &BasicManager{ 126 cfg: cfg, 127 metrics: metrics, 128 logger: logger, 129 processes: make(map[string]*managedProcess), 130 launch: launch, 131 } 132 } 133 134 // UpdateManagerConfig updates the BasicManagerConfig. 135 func (m *BasicManager) UpdateManagerConfig(c BasicManagerConfig) { 136 m.cfgMut.Lock() 137 defer m.cfgMut.Unlock() 138 m.cfg = c 139 } 140 141 // GetInstance returns the given instance by name. 142 func (m *BasicManager) GetInstance(name string) (ManagedInstance, error) { 143 m.mut.Lock() 144 defer m.mut.Unlock() 145 146 process, ok := m.processes[name] 147 if !ok { 148 return nil, fmt.Errorf("instance %s does not exist", name) 149 } 150 return process.inst, nil 151 } 152 153 // ListInstances returns the current active instances managed by BasicManager. 154 func (m *BasicManager) ListInstances() map[string]ManagedInstance { 155 m.mut.Lock() 156 defer m.mut.Unlock() 157 158 res := make(map[string]ManagedInstance, len(m.processes)) 159 for name, process := range m.processes { 160 res[name] = process.inst 161 } 162 return res 163 } 164 165 // ListConfigs lists the current active configs managed by BasicManager. 166 func (m *BasicManager) ListConfigs() map[string]Config { 167 m.mut.Lock() 168 defer m.mut.Unlock() 169 170 res := make(map[string]Config, len(m.processes)) 171 for name, process := range m.processes { 172 res[name] = process.cfg 173 } 174 return res 175 } 176 177 // ApplyConfig takes a Config and either starts a new managed instance or 178 // updates an existing managed instance. The value for Name in c is used to 179 // uniquely identify the Config and determine whether the Config has an 180 // existing associated managed instance. 181 func (m *BasicManager) ApplyConfig(c Config) error { 182 m.mut.Lock() 183 defer m.mut.Unlock() 184 185 // If the config already exists, we need to update it. 186 proc, ok := m.processes[c.Name] 187 if ok { 188 err := proc.inst.Update(c) 189 190 // If the instance could not be dynamically updated, we need to force the 191 // update by restarting it. If it failed for another reason, something 192 // serious went wrong and we'll completely give up without stopping the 193 // existing job. 194 if errors.Is(err, ErrInvalidUpdate{}) { 195 level.Info(m.logger).Log("msg", "could not dynamically update instance, will manually restart", "instance", c.Name, "reason", err) 196 197 // NOTE: we don't return here; we fall through to spawn the new instance. 198 proc.Stop() 199 } else if err != nil { 200 return fmt.Errorf("failed to update instance %s: %w", c.Name, err) 201 } else { 202 level.Info(m.logger).Log("msg", "dynamically updated instance", "instance", c.Name) 203 204 proc.cfg = c 205 return nil 206 } 207 } 208 209 // Spawn a new process for the new config. 210 err := m.spawnProcess(c) 211 if err != nil { 212 return err 213 } 214 215 m.metrics.RunningInstances.Inc() 216 return nil 217 } 218 219 func (m *BasicManager) spawnProcess(c Config) error { 220 inst, err := m.launch(c) 221 if err != nil { 222 return err 223 } 224 225 ctx, cancel := context.WithCancel(context.Background()) 226 done := make(chan bool) 227 228 proc := &managedProcess{ 229 cancel: cancel, 230 done: done, 231 cfg: c, 232 inst: inst, 233 } 234 m.processes[c.Name] = proc 235 236 go func() { 237 m.runProcess(ctx, c.Name, inst) 238 close(done) 239 240 // Now that the process has stopped, we can remove it from our managed 241 // list. 242 // 243 // However, it's possible that a new Config may have been applied and 244 // overwrote the initial value in our map. We only want to delete the 245 // process from the map if it hasn't changed from what we initially 246 // set it to. 247 // 248 // We only use the instance for comparing (which will never change) because 249 // the instance may have dynamically been given a new config since this 250 // goroutine started. 251 m.mut.Lock() 252 if storedProc, exist := m.processes[c.Name]; exist && storedProc.inst == inst { 253 delete(m.processes, c.Name) 254 } 255 m.mut.Unlock() 256 257 m.metrics.RunningInstances.Dec() 258 }() 259 260 return nil 261 } 262 263 // runProcess runs and instance and keeps it alive until it is explicitly stopped 264 // by cancelling the context. 265 func (m *BasicManager) runProcess(ctx context.Context, name string, inst ManagedInstance) { 266 for { 267 err := inst.Run(ctx) 268 if err != nil && err != context.Canceled { 269 backoff := m.instanceRestartBackoff() 270 271 m.metrics.AbnormalExits.WithLabelValues(name).Inc() 272 level.Error(m.logger).Log("msg", "instance stopped abnormally, restarting after backoff period", "err", err, "backoff", backoff, "instance", name) 273 time.Sleep(backoff) 274 } else { 275 level.Info(m.logger).Log("msg", "stopped instance", "instance", name) 276 break 277 } 278 } 279 } 280 281 func (m *BasicManager) instanceRestartBackoff() time.Duration { 282 m.cfgMut.Lock() 283 defer m.cfgMut.Unlock() 284 return m.cfg.InstanceRestartBackoff 285 } 286 287 // DeleteConfig removes a managed instance by its config name. Returns an error 288 // if there is no such managed instance with the given name. 289 func (m *BasicManager) DeleteConfig(name string) error { 290 m.mut.Lock() 291 proc, ok := m.processes[name] 292 if !ok { 293 m.mut.Unlock() 294 return errors.New("config does not exist") 295 } 296 m.mut.Unlock() 297 298 // spawnProcess is responsible for removing the process from the map after it 299 // stops so we don't need to delete anything from m.processes here. 300 proc.Stop() 301 return nil 302 } 303 304 // Ready indicates if all instances are ready for processing. 305 func (m *BasicManager) Ready() bool { 306 m.mut.Lock() 307 defer m.mut.Unlock() 308 309 for _, process := range m.processes { 310 if process.inst == nil { 311 return false 312 } 313 314 if !process.inst.Ready() { 315 return false 316 } 317 } 318 319 return true 320 } 321 322 // InstanceReady indicates if an instance is ready for processing. 323 func (m *BasicManager) InstanceReady(name string) bool { 324 inst, err := m.GetInstance(name) 325 if err != nil { 326 return false 327 } 328 329 return inst.Ready() 330 } 331 332 // Stop stops the BasicManager and stops all active processes for configs. 333 func (m *BasicManager) Stop() { 334 var wg sync.WaitGroup 335 336 // We don't need to change m.processes here; processes remove themselves 337 // from the map (in spawnProcess). 338 m.mut.Lock() 339 wg.Add(len(m.processes)) 340 for _, proc := range m.processes { 341 go func(proc *managedProcess) { 342 proc.Stop() 343 wg.Done() 344 }(proc) 345 } 346 m.mut.Unlock() 347 348 wg.Wait() 349 } 350 351 // MockManager exposes methods of the Manager interface as struct fields. 352 // Useful for tests. 353 type MockManager struct { 354 GetInstanceFunc func(name string) (ManagedInstance, error) 355 ListInstancesFunc func() map[string]ManagedInstance 356 ListConfigsFunc func() map[string]Config 357 ApplyConfigFunc func(Config) error 358 DeleteConfigFunc func(name string) error 359 StopFunc func() 360 } 361 362 func (m MockManager) Ready() bool { 363 return true 364 } 365 366 func (m MockManager) InstanceReady(name string) bool { 367 return true 368 } 369 370 // GetInstance implements Manager. 371 func (m MockManager) GetInstance(name string) (ManagedInstance, error) { 372 if m.GetInstanceFunc != nil { 373 return m.GetInstanceFunc(name) 374 } 375 panic("GetInstanceFunc not implemented") 376 } 377 378 // ListInstances implements Manager. 379 func (m MockManager) ListInstances() map[string]ManagedInstance { 380 if m.ListInstancesFunc != nil { 381 return m.ListInstancesFunc() 382 } 383 panic("ListInstancesFunc not implemented") 384 } 385 386 // ListConfigs implements Manager. 387 func (m MockManager) ListConfigs() map[string]Config { 388 if m.ListConfigsFunc != nil { 389 return m.ListConfigsFunc() 390 } 391 panic("ListConfigsFunc not implemented") 392 } 393 394 // ApplyConfig implements Manager. 395 func (m MockManager) ApplyConfig(c Config) error { 396 if m.ApplyConfigFunc != nil { 397 return m.ApplyConfigFunc(c) 398 } 399 panic("ApplyConfigFunc not implemented") 400 } 401 402 // DeleteConfig implements Manager. 403 func (m MockManager) DeleteConfig(name string) error { 404 if m.DeleteConfigFunc != nil { 405 return m.DeleteConfigFunc(name) 406 } 407 panic("DeleteConfigFunc not implemented") 408 } 409 410 // Stop implements Manager. 411 func (m MockManager) Stop() { 412 if m.StopFunc != nil { 413 m.StopFunc() 414 return 415 } 416 panic("StopFunc not implemented") 417 }