github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/indexgateway/ringmanager.go (about)

     1  package indexgateway
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/http"
     7  	"time"
     8  
     9  	"github.com/go-kit/log"
    10  	"github.com/go-kit/log/level"
    11  	"github.com/grafana/dskit/kv"
    12  	"github.com/grafana/dskit/ring"
    13  	"github.com/grafana/dskit/services"
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  
    17  	loki_util "github.com/grafana/loki/pkg/util"
    18  )
    19  
    20  const (
    21  	ringAutoForgetUnhealthyPeriods = 10
    22  	ringNameForServer              = "index-gateway"
    23  	ringNumTokens                  = 128
    24  	ringCheckPeriod                = 3 * time.Second
    25  
    26  	// RingIdentifier is used as a unique name to register the Index Gateway ring.
    27  	RingIdentifier = "index-gateway"
    28  
    29  	// RingKey is the name of the key used to register the different Index Gateway instances in the key-value store.
    30  	RingKey = "index-gateway"
    31  )
    32  
    33  // ManagerMode defines the different modes for the RingManager to execute.
    34  //
    35  // The RingManager and its modes are only relevant if the IndexGateway is running in ring mode.
    36  type ManagerMode int
    37  
    38  const (
    39  	// ClientMode is the RingManager mode executed by Loki components that are clients of the IndexGateway.
    40  	// The RingManager in client will have its own ring key-value store but it won't try to register itself in the ring.
    41  	ClientMode ManagerMode = iota
    42  
    43  	// ServerMode is the RingManager mode execute by the IndexGateway.
    44  	// The RingManager in server mode will register itself in the ring.
    45  	ServerMode
    46  )
    47  
    48  // RingManager is a component instantiated before all the others and is responsible for the ring setup.
    49  //
    50  // All Loki components that are involved with the IndexGateway (including the IndexGateway itself) will
    51  // require a RingManager. However, the components that are clients of the IndexGateway will ran it in client
    52  // mode while the IndexGateway itself will ran the manager in server mode.
    53  type RingManager struct {
    54  	services.Service
    55  
    56  	subservices        *services.Manager
    57  	subservicesWatcher *services.FailureWatcher
    58  
    59  	RingLifecycler *ring.BasicLifecycler
    60  	Ring           *ring.Ring
    61  	managerMode    ManagerMode
    62  
    63  	cfg Config
    64  
    65  	log log.Logger
    66  }
    67  
    68  // NewRingManager is the recommended way of instantiating a RingManager.
    69  //
    70  // The other functions will assume the RingManager was instantiated through this function.
    71  func NewRingManager(managerMode ManagerMode, cfg Config, log log.Logger, registerer prometheus.Registerer) (*RingManager, error) {
    72  	rm := &RingManager{
    73  		cfg: cfg, log: log, managerMode: managerMode,
    74  	}
    75  
    76  	if cfg.Mode != RingMode {
    77  		return nil, fmt.Errorf("ring manager shouldn't be invoked when index gateway not in ring mode")
    78  	}
    79  
    80  	// instantiate kv store for both modes.
    81  	ringStore, err := kv.NewClient(
    82  		rm.cfg.Ring.KVStore,
    83  		ring.GetCodec(),
    84  		kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", registerer), "index-gateway-ring-manager"),
    85  		rm.log,
    86  	)
    87  	if err != nil {
    88  		return nil, errors.Wrap(err, "index gateway ring manager create KV store client")
    89  	}
    90  
    91  	// instantiate ring for both mode modes.
    92  	ringCfg := rm.cfg.Ring.ToRingConfig(rm.cfg.Ring.ReplicationFactor)
    93  	rm.Ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("loki_", registerer), rm.log)
    94  	if err != nil {
    95  		return nil, errors.Wrap(err, "index gateway ring manager create ring client")
    96  	}
    97  
    98  	if managerMode == ServerMode {
    99  		if err := rm.startServerMode(ringStore, registerer); err != nil {
   100  			return nil, err
   101  		}
   102  		return rm, nil
   103  	}
   104  
   105  	if err := rm.startClientMode(); err != nil {
   106  		return nil, err
   107  	}
   108  	return rm, nil
   109  }
   110  
   111  func (rm *RingManager) startServerMode(ringStore kv.Client, registerer prometheus.Registerer) error {
   112  	lifecyclerCfg, err := rm.cfg.Ring.ToLifecyclerConfig(ringNumTokens, rm.log)
   113  	if err != nil {
   114  		return errors.Wrap(err, "invalid ring lifecycler config")
   115  	}
   116  
   117  	delegate := ring.BasicLifecyclerDelegate(rm)
   118  	delegate = ring.NewLeaveOnStoppingDelegate(delegate, rm.log)
   119  	delegate = ring.NewTokensPersistencyDelegate(rm.cfg.Ring.TokensFilePath, ring.JOINING, delegate, rm.log)
   120  	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*rm.cfg.Ring.HeartbeatTimeout, delegate, rm.log)
   121  
   122  	rm.RingLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, RingKey, ringStore, delegate, rm.log, registerer)
   123  	if err != nil {
   124  		return errors.Wrap(err, "index gateway ring manager create ring lifecycler")
   125  	}
   126  
   127  	svcs := []services.Service{rm.RingLifecycler, rm.Ring}
   128  	rm.subservices, err = services.NewManager(svcs...)
   129  	if err != nil {
   130  		return errors.Wrap(err, "new index gateway services manager in server mode")
   131  	}
   132  
   133  	rm.subservicesWatcher = services.NewFailureWatcher()
   134  	rm.subservicesWatcher.WatchManager(rm.subservices)
   135  	rm.Service = services.NewBasicService(rm.starting, rm.running, rm.stopping)
   136  
   137  	return nil
   138  }
   139  
   140  func (rm *RingManager) startClientMode() error {
   141  	var err error
   142  
   143  	svcs := []services.Service{rm.Ring}
   144  	rm.subservices, err = services.NewManager(svcs...)
   145  	if err != nil {
   146  		return errors.Wrap(err, "new index gateway services manager in client mode")
   147  	}
   148  
   149  	rm.subservicesWatcher = services.NewFailureWatcher()
   150  	rm.subservicesWatcher.WatchManager(rm.subservices)
   151  
   152  	rm.Service = services.NewIdleService(func(ctx context.Context) error {
   153  		return services.StartManagerAndAwaitHealthy(ctx, rm.subservices)
   154  	}, func(failureCase error) error {
   155  		return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices)
   156  	})
   157  
   158  	return nil
   159  }
   160  
   161  // starting implements the Lifecycler interface and is one of the lifecycle hooks.
   162  func (rm *RingManager) starting(ctx context.Context) (err error) {
   163  	// In case this function will return error we want to unregister the instance
   164  	// from the ring. We do it ensuring dependencies are gracefully stopped if they
   165  	// were already started.
   166  	defer func() {
   167  		if err == nil || rm.subservices == nil {
   168  			return
   169  		}
   170  
   171  		if stopErr := services.StopManagerAndAwaitStopped(context.Background(), rm.subservices); stopErr != nil {
   172  			level.Error(rm.log).Log("msg", "failed to gracefully stop index gateway ring manager dependencies", "err", stopErr)
   173  		}
   174  	}()
   175  
   176  	if err := services.StartManagerAndAwaitHealthy(ctx, rm.subservices); err != nil {
   177  		return errors.Wrap(err, "unable to start index gateway ring manager subservices")
   178  	}
   179  
   180  	// The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that
   181  	// someone wants to do can be done before becoming ACTIVE. For the index gateway we don't currently
   182  	// have any additional work so we can become ACTIVE right away.
   183  	// Wait until the ring client detected this instance in the JOINING
   184  	// state to make sure that when we'll run the initial sync we already
   185  	// know the tokens assigned to this instance.
   186  	level.Info(rm.log).Log("msg", "waiting until index gateway is JOINING in the ring")
   187  	if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.JOINING); err != nil {
   188  		return err
   189  	}
   190  	level.Info(rm.log).Log("msg", "index gateway is JOINING in the ring")
   191  
   192  	if err = rm.RingLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil {
   193  		return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE)
   194  	}
   195  
   196  	// Wait until the ring client detected this instance in the ACTIVE state to
   197  	// make sure that when we'll run the loop it won't be detected as a ring
   198  	// topology change.
   199  	level.Info(rm.log).Log("msg", "waiting until index gateway is ACTIVE in the ring")
   200  	if err := ring.WaitInstanceState(ctx, rm.Ring, rm.RingLifecycler.GetInstanceID(), ring.ACTIVE); err != nil {
   201  		return err
   202  	}
   203  	level.Info(rm.log).Log("msg", "index gateway is ACTIVE in the ring")
   204  
   205  	return nil
   206  }
   207  
   208  // running implements the Lifecycler interface and is one of the lifecycle hooks.
   209  func (rm *RingManager) running(ctx context.Context) error {
   210  	t := time.NewTicker(ringCheckPeriod)
   211  	defer t.Stop()
   212  	for {
   213  		select {
   214  		case <-ctx.Done():
   215  			return nil
   216  		case err := <-rm.subservicesWatcher.Chan():
   217  			return errors.Wrap(err, "running index gateway ring manager subservice failed")
   218  		case <-t.C:
   219  			continue
   220  		}
   221  	}
   222  }
   223  
   224  // stopping implements the Lifecycler interface and is one of the lifecycle hooks.
   225  func (rm *RingManager) stopping(_ error) error {
   226  	level.Debug(rm.log).Log("msg", "stopping index gateway ring manager")
   227  	return services.StopManagerAndAwaitStopped(context.Background(), rm.subservices)
   228  }
   229  
   230  // IndexGatewayOwnsTenant dictates if a given tenant should be ignored by an IndexGateway or not.
   231  //
   232  // It fallbacks to true so that the IndexGateway will only skip tenants if it is certain of that.
   233  // This implementation relies on the tokens assigned to an IndexGateway instance to define if a tenant
   234  // is assigned or not.
   235  func (rm *RingManager) IndexGatewayOwnsTenant(tenant string) bool {
   236  	if rm.cfg.Mode != RingMode {
   237  		return true
   238  	}
   239  
   240  	if rm.managerMode == ClientMode {
   241  		level.Error(rm.log).Log("msg", "ring manager in client mode doesn't support tenant in boundaries interface")
   242  		return true
   243  	}
   244  
   245  	return loki_util.IsAssignedKey(rm.Ring, rm.RingLifecycler.GetInstanceAddr(), tenant)
   246  }
   247  
   248  // ServeHTTP serves the HTTP route /indexgateway/ring.
   249  func (rm *RingManager) ServeHTTP(w http.ResponseWriter, req *http.Request) {
   250  	if rm.cfg.Mode == RingMode {
   251  		rm.Ring.ServeHTTP(w, req)
   252  	} else {
   253  		_, _ = w.Write([]byte("IndexGateway running with 'useIndexGatewayRing' disabled."))
   254  	}
   255  }