github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/indexgateway/ringmanager.go (about) 1 package indexgateway 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "time" 8 9 "github.com/go-kit/log" 10 "github.com/go-kit/log/level" 11 "github.com/grafana/dskit/kv" 12 "github.com/grafana/dskit/ring" 13 "github.com/grafana/dskit/services" 14 "github.com/pkg/errors" 15 "github.com/prometheus/client_golang/prometheus" 16 17 loki_util "github.com/grafana/loki/pkg/util" 18 ) 19 20 const ( 21 ringAutoForgetUnhealthyPeriods = 10 22 ringNameForServer = "index-gateway" 23 ringNumTokens = 128 24 ringCheckPeriod = 3 * time.Second 25 26 // RingIdentifier is used as a unique name to register the Index Gateway ring. 27 RingIdentifier = "index-gateway" 28 29 // RingKey is the name of the key used to register the different Index Gateway instances in the key-value store. 30 RingKey = "index-gateway" 31 ) 32 33 // ManagerMode defines the different modes for the RingManager to execute. 34 // 35 // The RingManager and its modes are only relevant if the IndexGateway is running in ring mode. 36 type ManagerMode int 37 38 const ( 39 // ClientMode is the RingManager mode executed by Loki components that are clients of the IndexGateway. 40 // The RingManager in client will have its own ring key-value store but it won't try to register itself in the ring. 41 ClientMode ManagerMode = iota 42 43 // ServerMode is the RingManager mode execute by the IndexGateway. 44 // The RingManager in server mode will register itself in the ring. 45 ServerMode 46 ) 47 48 // RingManager is a component instantiated before all the others and is responsible for the ring setup. 49 // 50 // All Loki components that are involved with the IndexGateway (including the IndexGateway itself) will 51 // require a RingManager. However, the components that are clients of the IndexGateway will ran it in client 52 // mode while the IndexGateway itself will ran the manager in server mode. 53 type RingManager struct { 54 services.Service 55 56 subservices *services.Manager 57 subservicesWatcher *services.FailureWatcher 58 59 RingLifecycler *ring.BasicLifecycler 60 Ring *ring.Ring 61 managerMode ManagerMode 62 63 cfg Config 64 65 log log.Logger 66 } 67 68 // NewRingManager is the recommended way of instantiating a RingManager. 69 // 70 // The other functions will assume the RingManager was instantiated through this function. 71 func NewRingManager(managerMode ManagerMode, cfg Config, log log.Logger, registerer prometheus.Registerer) (*RingManager, error) { 72 rm := &RingManager{ 73 cfg: cfg, log: log, managerMode: managerMode, 74 } 75 76 if cfg.Mode != RingMode { 77 return nil, fmt.Errorf("ring manager shouldn't be invoked when index gateway not in ring mode") 78 } 79 80 // instantiate kv store for both modes. 81 ringStore, err := kv.NewClient( 82 rm.cfg.Ring.KVStore, 83 ring.GetCodec(), 84 kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "index-gateway-ring-manager"), 85 rm.log, 86 ) 87 if err != nil { 88 return nil, errors.Wrap(err, "index gateway ring manager create KV store client") 89 } 90 91 // instantiate ring for both mode modes. 92 ringCfg := rm.cfg.Ring.ToRingConfig(rm.cfg.Ring.ReplicationFactor) 93 rm.Ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("loki_", registerer), rm.log) 94 if err != nil { 95 return nil, errors.Wrap(err, "index gateway ring manager create ring client") 96 } 97 98 if managerMode == ServerMode { 99 if err := rm.startServerMode(ringStore, registerer); err != nil { 100 return nil, err 101 } 102 return rm, nil 103 } 104 105 if err := rm.startClientMode(); err != nil { 106 return nil, err 107 } 108 return rm, nil 109 } 110 111 func (rm *RingManager) startServerMode(ringStore kv.Client, registerer prometheus.Registerer) error { 112 lifecyclerCfg, err := rm.cfg.Ring.ToLifecyclerConfig(ringNumTokens, rm.log) 113 if err != nil { 114 return errors.Wrap(err, "invalid ring lifecycler config") 115 } 116 117 delegate := ring.BasicLifecyclerDelegate(rm) 118 delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.log) 119 delegate = ring.NewTokensPersistencyDelegate(rm.cfg.Ring.TokensFilePath, ring.JOINING, delegate, rm.log) 120 delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.Ring.HeartbeatTimeout, delegate, rm.log) 121 122 rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, RingKey, ringStore, delegate, rm.log, registerer) 123 if err != nil { 124 return errors.Wrap(err, "index gateway ring manager create ring lifecycler") 125 } 126 127 svcs := []services.Service{rm.RingLifecycler, rm.Ring} 128 rm.subservices, err = services.NewManager(svcs...) 129 if err != nil { 130 return errors.Wrap(err, "new index gateway services manager in server mode") 131 } 132 133 rm.subservicesWatcher = services.NewFailureWatcher() 134 rm.subservicesWatcher.WatchManager(rm.subservices) 135 rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping) 136 137 return nil 138 } 139 140 func (rm *RingManager) startClientMode() error { 141 var err error 142 143 svcs := []services.Service{rm.Ring} 144 rm.subservices, err = services.NewManager(svcs...) 145 if err != nil { 146 return errors.Wrap(err, "new index gateway services manager in client mode") 147 } 148 149 rm.subservicesWatcher = services.NewFailureWatcher() 150 rm.subservicesWatcher.WatchManager(rm.subservices) 151 152 rm.Service = services.NewIdleService(func(ctx context.Context) error { 153 return services.StartManagerAndAwaitHealthy(ctx, rm.subservices) 154 }, func(failureCase error) error { 155 return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) 156 }) 157 158 return nil 159 } 160 161 // starting implements the Lifecycler interface and is one of the lifecycle hooks. 162 func (rm *RingManager) starting(ctx context.Context) (err error) { 163 // In case this function will return error we want to unregister the instance 164 // from the ring. We do it ensuring dependencies are gracefully stopped if they 165 // were already started. 166 defer func() { 167 if err == nil || rm.subservices == nil { 168 return 169 } 170 171 if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil { 172 level.Error(rm.log).Log("msg", "failed to gracefully stop index gateway ring manager dependencies", "err", stopErr) 173 } 174 }() 175 176 if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil { 177 return errors.Wrap(err, "unable to start index gateway ring manager subservices") 178 } 179 180 // The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that 181 // someone wants to do can be done before becoming ACTIVE. For the index gateway we don't currently 182 // have any additional work so we can become ACTIVE right away. 183 // Wait until the ring client detected this instance in the JOINING 184 // state to make sure that when we'll run the initial sync we already 185 // know the tokens assigned to this instance. 186 level.Info(rm.log).Log("msg", "waiting until index gateway is JOINING in the ring") 187 if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil { 188 return err 189 } 190 level.Info(rm.log).Log("msg", "index gateway is JOINING in the ring") 191 192 if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { 193 return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) 194 } 195 196 // Wait until the ring client detected this instance in the ACTIVE state to 197 // make sure that when we'll run the loop it won't be detected as a ring 198 // topology change. 199 level.Info(rm.log).Log("msg", "waiting until index gateway is ACTIVE in the ring") 200 if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { 201 return err 202 } 203 level.Info(rm.log).Log("msg", "index gateway is ACTIVE in the ring") 204 205 return nil 206 } 207 208 // running implements the Lifecycler interface and is one of the lifecycle hooks. 209 func (rm *RingManager) running(ctx context.Context) error { 210 t := time.NewTicker(ringCheckPeriod) 211 defer t.Stop() 212 for { 213 select { 214 case <-ctx.Done(): 215 return nil 216 case err := <-rm.subservicesWatcher.Chan(): 217 return errors.Wrap(err, "running index gateway ring manager subservice failed") 218 case <-t.C: 219 continue 220 } 221 } 222 } 223 224 // stopping implements the Lifecycler interface and is one of the lifecycle hooks. 225 func (rm *RingManager) stopping(_ error) error { 226 level.Debug(rm.log).Log("msg", "stopping index gateway ring manager") 227 return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices) 228 } 229 230 // IndexGatewayOwnsTenant dictates if a given tenant should be ignored by an IndexGateway or not. 231 // 232 // It fallbacks to true so that the IndexGateway will only skip tenants if it is certain of that. 233 // This implementation relies on the tokens assigned to an IndexGateway instance to define if a tenant 234 // is assigned or not. 235 func (rm *RingManager) IndexGatewayOwnsTenant(tenant string) bool { 236 if rm.cfg.Mode != RingMode { 237 return true 238 } 239 240 if rm.managerMode == ClientMode { 241 level.Error(rm.log).Log("msg", "ring manager in client mode doesn't support tenant in boundaries interface") 242 return true 243 } 244 245 return loki_util.IsAssignedKey(rm.Ring, rm.RingLifecycler.GetInstanceAddr(), tenant) 246 } 247 248 // ServeHTTP serves the HTTP route /indexgateway/ring. 249 func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) { 250 if rm.cfg.Mode == RingMode { 251 rm.Ring.ServeHTTP(w, req) 252 } else { 253 _, _ = w.Write([]byte("IndexGateway running with 'useIndexGatewayRing' disabled.")) 254 } 255 }