github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/server/rawtcp/server.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package rawtcp
    22  
    23  import (
    24  	"bufio"
    25  	"fmt"
    26  	"io"
    27  	"net"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/m3db/m3/src/aggregator/aggregator"
    32  	"github.com/m3db/m3/src/aggregator/rate"
    33  	"github.com/m3db/m3/src/metrics/encoding"
    34  	"github.com/m3db/m3/src/metrics/encoding/protobuf"
    35  	"github.com/m3db/m3/src/metrics/metadata"
    36  	"github.com/m3db/m3/src/metrics/metric/aggregated"
    37  	"github.com/m3db/m3/src/metrics/metric/unaggregated"
    38  	"github.com/m3db/m3/src/metrics/policy"
    39  	xio "github.com/m3db/m3/src/x/io"
    40  	xserver "github.com/m3db/m3/src/x/server"
    41  	xtime "github.com/m3db/m3/src/x/time"
    42  
    43  	"github.com/uber-go/tally"
    44  	"go.uber.org/zap"
    45  )
    46  
    47  const (
    48  	unknownRemoteHostAddress = "<unknown>"
    49  )
    50  
    51  // NewServer creates a new raw TCP server.
    52  func NewServer(address string, aggregator aggregator.Aggregator, opts Options) xserver.Server {
    53  	iOpts := opts.InstrumentOptions()
    54  	handlerScope := iOpts.MetricsScope().Tagged(map[string]string{"handler": "rawtcp"})
    55  	handler := NewHandler(aggregator, opts.SetInstrumentOptions(iOpts.SetMetricsScope(handlerScope)))
    56  	return xserver.NewServer(address, handler, opts.ServerOptions())
    57  }
    58  
    59  type handlerMetrics struct {
    60  	unknownMessageTypeErrors tally.Counter
    61  	addUntimedErrors         tally.Counter
    62  	addTimedErrors           tally.Counter
    63  	addForwardedErrors       tally.Counter
    64  	addPassthroughErrors     tally.Counter
    65  	unknownErrorTypeErrors   tally.Counter
    66  	decodeErrors             tally.Counter
    67  	errLogRateLimited        tally.Counter
    68  }
    69  
    70  func newHandlerMetrics(scope tally.Scope) handlerMetrics {
    71  	return handlerMetrics{
    72  		unknownMessageTypeErrors: scope.Counter("unknown-message-type-errors"),
    73  		addUntimedErrors:         scope.Counter("add-untimed-errors"),
    74  		addTimedErrors:           scope.Counter("add-timed-errors"),
    75  		addForwardedErrors:       scope.Counter("add-forwarded-errors"),
    76  		addPassthroughErrors:     scope.Counter("add-passthrough-errors"),
    77  		unknownErrorTypeErrors:   scope.Counter("unknown-error-type-errors"),
    78  		decodeErrors:             scope.Counter("decode-errors"),
    79  		errLogRateLimited:        scope.Counter("error-log-rate-limited"),
    80  	}
    81  }
    82  
    83  type handler struct {
    84  	sync.Mutex
    85  
    86  	aggregator     aggregator.Aggregator
    87  	log            *zap.Logger
    88  	readBufferSize int
    89  	protobufItOpts protobuf.UnaggregatedOptions
    90  
    91  	errLogRateLimiter *rate.Limiter
    92  	metrics           handlerMetrics
    93  
    94  	opts Options
    95  }
    96  
    97  // NewHandler creates a new raw TCP handler.
    98  func NewHandler(aggregator aggregator.Aggregator, opts Options) xserver.Handler {
    99  	iOpts := opts.InstrumentOptions()
   100  	var limiter *rate.Limiter
   101  	if rateLimit := opts.ErrorLogLimitPerSecond(); rateLimit != 0 {
   102  		limiter = rate.NewLimiter(rateLimit)
   103  	}
   104  	return &handler{
   105  		aggregator:        aggregator,
   106  		log:               iOpts.Logger(),
   107  		readBufferSize:    opts.ReadBufferSize(),
   108  		protobufItOpts:    opts.ProtobufUnaggregatedIteratorOptions(),
   109  		errLogRateLimiter: limiter,
   110  		metrics:           newHandlerMetrics(iOpts.MetricsScope()),
   111  		opts:              opts,
   112  	}
   113  }
   114  
   115  func (s *handler) Handle(conn net.Conn) {
   116  	remoteAddress := unknownRemoteHostAddress
   117  	if remoteAddr := conn.RemoteAddr(); remoteAddr != nil {
   118  		remoteAddress = remoteAddr.String()
   119  	}
   120  
   121  	nowFn := s.opts.ClockOptions().NowFn()
   122  	rOpts := xio.ResettableReaderOptions{ReadBufferSize: s.readBufferSize}
   123  	read := s.opts.RWOptions().ResettableReaderFn()(conn, rOpts)
   124  	reader := bufio.NewReaderSize(read, s.readBufferSize)
   125  	it := protobuf.NewUnaggregatedIterator(reader, s.protobufItOpts)
   126  	defer it.Close()
   127  
   128  	// Iterate over the incoming metrics stream and queue up metrics.
   129  	var (
   130  		untimedMetric       unaggregated.MetricUnion
   131  		stagedMetadatas     metadata.StagedMetadatas
   132  		forwardedMetric     aggregated.ForwardedMetric
   133  		forwardMetadata     metadata.ForwardMetadata
   134  		timedMetric         aggregated.Metric
   135  		timedMetadata       metadata.TimedMetadata
   136  		passthroughMetric   aggregated.Metric
   137  		passthroughMetadata policy.StoragePolicy
   138  		err                 error
   139  	)
   140  	for it.Next() {
   141  		current := it.Current()
   142  		switch current.Type {
   143  		case encoding.CounterWithMetadatasType:
   144  			untimedMetric = current.CounterWithMetadatas.Counter.ToUnion()
   145  			untimedMetric.Annotation = current.CounterWithMetadatas.Annotation
   146  			stagedMetadatas = current.CounterWithMetadatas.StagedMetadatas
   147  			err = s.aggregator.AddUntimed(untimedMetric, stagedMetadatas)
   148  		case encoding.BatchTimerWithMetadatasType:
   149  			untimedMetric = current.BatchTimerWithMetadatas.BatchTimer.ToUnion()
   150  			untimedMetric.Annotation = current.BatchTimerWithMetadatas.Annotation
   151  			stagedMetadatas = current.BatchTimerWithMetadatas.StagedMetadatas
   152  			err = s.aggregator.AddUntimed(untimedMetric, stagedMetadatas)
   153  		case encoding.GaugeWithMetadatasType:
   154  			untimedMetric = current.GaugeWithMetadatas.Gauge.ToUnion()
   155  			untimedMetric.Annotation = current.GaugeWithMetadatas.Annotation
   156  			stagedMetadatas = current.GaugeWithMetadatas.StagedMetadatas
   157  			err = s.aggregator.AddUntimed(untimedMetric, stagedMetadatas)
   158  		case encoding.ForwardedMetricWithMetadataType:
   159  			forwardedMetric = current.ForwardedMetricWithMetadata.ForwardedMetric
   160  			untimedMetric.Annotation = current.ForwardedMetricWithMetadata.Annotation
   161  			forwardMetadata = current.ForwardedMetricWithMetadata.ForwardMetadata
   162  			err = s.aggregator.AddForwarded(forwardedMetric, forwardMetadata)
   163  		case encoding.TimedMetricWithMetadataType:
   164  			timedMetric = current.TimedMetricWithMetadata.Metric
   165  			timedMetric.Annotation = current.TimedMetricWithMetadata.Annotation
   166  			timedMetadata = current.TimedMetricWithMetadata.TimedMetadata
   167  			err = s.aggregator.AddTimed(timedMetric, timedMetadata)
   168  		case encoding.TimedMetricWithMetadatasType:
   169  			timedMetric = current.TimedMetricWithMetadatas.Metric
   170  			timedMetric.Annotation = current.TimedMetricWithMetadatas.Annotation
   171  			stagedMetadatas = current.TimedMetricWithMetadatas.StagedMetadatas
   172  			err = s.aggregator.AddTimedWithStagedMetadatas(timedMetric, stagedMetadatas)
   173  		case encoding.PassthroughMetricWithMetadataType:
   174  			passthroughMetric = current.PassthroughMetricWithMetadata.Metric
   175  			passthroughMetric.Annotation = current.PassthroughMetricWithMetadata.Annotation
   176  			passthroughMetadata = current.PassthroughMetricWithMetadata.StoragePolicy
   177  			err = s.aggregator.AddPassthrough(passthroughMetric, passthroughMetadata)
   178  		default:
   179  			err = newUnknownMessageTypeError(current.Type)
   180  		}
   181  
   182  		if err == nil {
   183  			continue
   184  		}
   185  
   186  		// We rate limit the error log here because the error rate may scale with
   187  		// the metrics incoming rate and consume lots of cpu cycles.
   188  		if s.errLogRateLimiter != nil && !s.errLogRateLimiter.IsAllowed(1, xtime.ToUnixNano(nowFn())) {
   189  			s.metrics.errLogRateLimited.Inc(1)
   190  			continue
   191  		}
   192  		switch err.(type) {
   193  		case unknownMessageTypeError:
   194  			s.metrics.unknownMessageTypeErrors.Inc(1)
   195  			s.log.Error("unexpected message type",
   196  				zap.String("remoteAddress", remoteAddress),
   197  				zap.Error(err),
   198  			)
   199  		default:
   200  			switch current.Type {
   201  			case encoding.CounterWithMetadatasType:
   202  				fallthrough
   203  			case encoding.BatchTimerWithMetadatasType:
   204  				fallthrough
   205  			case encoding.GaugeWithMetadatasType:
   206  				s.metrics.addUntimedErrors.Inc(1)
   207  				s.log.Error("error adding untimed metric",
   208  					zap.String("remoteAddress", remoteAddress),
   209  					zap.Stringer("type", untimedMetric.Type),
   210  					zap.Stringer("id", untimedMetric.ID),
   211  					zap.Any("metadatas", stagedMetadatas),
   212  					zap.Error(err),
   213  				)
   214  			case encoding.ForwardedMetricWithMetadataType:
   215  				s.metrics.addForwardedErrors.Inc(1)
   216  				s.log.Error("error adding forwarded metric",
   217  					zap.String("remoteAddress", remoteAddress),
   218  					zap.Stringer("id", forwardedMetric.ID),
   219  					zap.Time("timestamp", time.Unix(0, forwardedMetric.TimeNanos)),
   220  					zap.Float64s("values", forwardedMetric.Values),
   221  					zap.Error(err),
   222  				)
   223  			case encoding.TimedMetricWithMetadataType:
   224  				fallthrough
   225  			case encoding.TimedMetricWithMetadatasType:
   226  				s.metrics.addTimedErrors.Inc(1)
   227  				s.log.Error("error adding timed metric",
   228  					zap.String("remoteAddress", remoteAddress),
   229  					zap.Stringer("id", timedMetric.ID),
   230  					zap.Time("timestamp", time.Unix(0, timedMetric.TimeNanos)),
   231  					zap.Float64("value", timedMetric.Value),
   232  					zap.Any("metadatas", stagedMetadatas),
   233  					zap.Error(err),
   234  				)
   235  			case encoding.PassthroughMetricWithMetadataType:
   236  				s.metrics.addPassthroughErrors.Inc(1)
   237  				s.log.Error("error adding passthrough metric",
   238  					zap.String("remoteAddress", remoteAddress),
   239  					zap.Stringer("id", timedMetric.ID),
   240  					zap.Time("timestamp", time.Unix(0, timedMetric.TimeNanos)),
   241  					zap.Float64("value", timedMetric.Value),
   242  					zap.Error(err),
   243  				)
   244  			default:
   245  				// make the linter happy.
   246  				s.log.Error("unknown message type for error. this cannot happen")
   247  			}
   248  		}
   249  	}
   250  
   251  	// If there is an error during decoding, it's likely due to a broken connection
   252  	// and therefore we ignore the EOF error.
   253  	if err := it.Err(); err != nil && err != io.EOF {
   254  		s.log.Error("decode error",
   255  			zap.String("remoteAddress", remoteAddress),
   256  			zap.Error(err),
   257  		)
   258  		s.metrics.decodeErrors.Inc(1)
   259  	}
   260  }
   261  
   262  func (s *handler) Close() {
   263  	// NB(cw) Do not close s.aggregator here because it's shared between
   264  	// the raw TCP server and the http server, and it will be closed on
   265  	// exit signal.
   266  }
   267  
   268  type unknownMessageTypeError struct {
   269  	msgType encoding.UnaggregatedMessageType
   270  }
   271  
   272  func newUnknownMessageTypeError(
   273  	msgType encoding.UnaggregatedMessageType,
   274  ) unknownMessageTypeError {
   275  	return unknownMessageTypeError{msgType: msgType}
   276  }
   277  
   278  func (e unknownMessageTypeError) Error() string {
   279  	return fmt.Sprintf("unknown message type %v", e.msgType)
   280  }