github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/alertmanager_client.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"flag"
     5  	"time"
     6  
     7  	"github.com/go-kit/log"
     8  	"github.com/grafana/dskit/crypto/tls"
     9  	"github.com/grafana/dskit/grpcclient"
    10  	"github.com/grafana/dskit/ring/client"
    11  	"github.com/pkg/errors"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/prometheus/client_golang/prometheus/promauto"
    14  	"google.golang.org/grpc"
    15  	"google.golang.org/grpc/health/grpc_health_v1"
    16  
    17  	"github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb"
    18  )
    19  
    20  // ClientsPool is the interface used to get the client from the pool for a specified address.
    21  type ClientsPool interface {
    22  	// GetClientFor returns the alertmanager client for the given address.
    23  	GetClientFor(addr string) (Client, error)
    24  }
    25  
    26  // Client is the interface that should be implemented by any client used to read/write data to an alertmanager via GRPC.
    27  type Client interface {
    28  	alertmanagerpb.AlertmanagerClient
    29  
    30  	// RemoteAddress returns the address of the remote alertmanager and is used to uniquely
    31  	// identify an alertmanager instance.
    32  	RemoteAddress() string
    33  }
    34  
    35  // ClientConfig is the configuration struct for the alertmanager client.
    36  type ClientConfig struct {
    37  	RemoteTimeout time.Duration    `yaml:"remote_timeout"`
    38  	TLSEnabled    bool             `yaml:"tls_enabled"`
    39  	TLS           tls.ClientConfig `yaml:",inline"`
    40  }
    41  
    42  // RegisterFlagsWithPrefix registers flags with prefix.
    43  func (cfg *ClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
    44  	f.BoolVar(&cfg.TLSEnabled, prefix+".tls-enabled", cfg.TLSEnabled, "Enable TLS in the GRPC client. This flag needs to be enabled when any other TLS flag is set. If set to false, insecure connection to gRPC server will be used.")
    45  	f.DurationVar(&cfg.RemoteTimeout, prefix+".remote-timeout", 2*time.Second, "Timeout for downstream alertmanagers.")
    46  	cfg.TLS.RegisterFlagsWithPrefix(prefix, f)
    47  }
    48  
    49  type alertmanagerClientsPool struct {
    50  	pool *client.Pool
    51  }
    52  
    53  func newAlertmanagerClientsPool(discovery client.PoolServiceDiscovery, amClientCfg ClientConfig, logger log.Logger, reg prometheus.Registerer) ClientsPool {
    54  	// We prefer sane defaults instead of exposing further config options.
    55  	grpcCfg := grpcclient.Config{
    56  		MaxRecvMsgSize:      16 * 1024 * 1024,
    57  		MaxSendMsgSize:      4 * 1024 * 1024,
    58  		GRPCCompression:     "",
    59  		RateLimit:           0,
    60  		RateLimitBurst:      0,
    61  		BackoffOnRatelimits: false,
    62  		TLSEnabled:          amClientCfg.TLSEnabled,
    63  		TLS:                 amClientCfg.TLS,
    64  	}
    65  
    66  	requestDuration := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
    67  		Name:    "cortex_alertmanager_distributor_client_request_duration_seconds",
    68  		Help:    "Time spent executing requests from an alertmanager to another alertmanager.",
    69  		Buckets: prometheus.ExponentialBuckets(0.008, 4, 7),
    70  	}, []string{"operation", "status_code"})
    71  
    72  	factory := func(addr string) (client.PoolClient, error) {
    73  		return dialAlertmanagerClient(grpcCfg, addr, requestDuration)
    74  	}
    75  
    76  	poolCfg := client.PoolConfig{
    77  		CheckInterval:      time.Minute,
    78  		HealthCheckEnabled: true,
    79  		HealthCheckTimeout: 10 * time.Second,
    80  	}
    81  
    82  	clientsCount := promauto.With(reg).NewGauge(prometheus.GaugeOpts{
    83  		Namespace: "cortex",
    84  		Name:      "alertmanager_distributor_clients",
    85  		Help:      "The current number of alertmanager distributor clients in the pool.",
    86  	})
    87  
    88  	return &alertmanagerClientsPool{pool: client.NewPool("alertmanager", poolCfg, discovery, factory, clientsCount, logger)}
    89  }
    90  
    91  func (f *alertmanagerClientsPool) GetClientFor(addr string) (Client, error) {
    92  	c, err := f.pool.GetClientFor(addr)
    93  	if err != nil {
    94  		return nil, err
    95  	}
    96  	return c.(Client), nil
    97  }
    98  
    99  func dialAlertmanagerClient(cfg grpcclient.Config, addr string, requestDuration *prometheus.HistogramVec) (*alertmanagerClient, error) {
   100  	opts, err := cfg.DialOption(grpcclient.Instrument(requestDuration))
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	conn, err := grpc.Dial(addr, opts...)
   105  	if err != nil {
   106  		return nil, errors.Wrapf(err, "failed to dial alertmanager %s", addr)
   107  	}
   108  
   109  	return &alertmanagerClient{
   110  		AlertmanagerClient: alertmanagerpb.NewAlertmanagerClient(conn),
   111  		HealthClient:       grpc_health_v1.NewHealthClient(conn),
   112  		conn:               conn,
   113  	}, nil
   114  }
   115  
   116  type alertmanagerClient struct {
   117  	alertmanagerpb.AlertmanagerClient
   118  	grpc_health_v1.HealthClient
   119  	conn *grpc.ClientConn
   120  }
   121  
   122  func (c *alertmanagerClient) Close() error {
   123  	return c.conn.Close()
   124  }
   125  
   126  func (c *alertmanagerClient) String() string {
   127  	return c.RemoteAddress()
   128  }
   129  
   130  func (c *alertmanagerClient) RemoteAddress() string {
   131  	return c.conn.Target()
   132  }