github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/client/writer.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package client
    22  
    23  import (
    24  	"errors"
    25  	"sync"
    26  
    27  	"github.com/m3db/m3/src/cluster/placement"
    28  	"github.com/m3db/m3/src/metrics/encoding"
    29  	"github.com/m3db/m3/src/metrics/encoding/protobuf"
    30  	"github.com/m3db/m3/src/metrics/metadata"
    31  	"github.com/m3db/m3/src/metrics/metric"
    32  	"github.com/m3db/m3/src/metrics/metric/aggregated"
    33  	"github.com/m3db/m3/src/metrics/metric/unaggregated"
    34  	"github.com/m3db/m3/src/metrics/policy"
    35  	xerrors "github.com/m3db/m3/src/x/errors"
    36  
    37  	"github.com/uber-go/tally"
    38  	"go.uber.org/atomic"
    39  	"go.uber.org/zap"
    40  )
    41  
    42  var (
    43  	errInstanceWriterClosed    = errors.New("instance writer is closed")
    44  	errUnrecognizedMetricType  = errors.New("unrecognized metric type")
    45  	errUnrecognizedPayloadType = errors.New("unrecognized payload type")
    46  )
    47  
    48  type instanceWriter interface {
    49  	// Write writes a metric payload for a given shard.
    50  	Write(shard uint32, payload payloadUnion) error
    51  
    52  	// Flush flushes any buffered metrics.
    53  	Flush() error
    54  
    55  	// QueueSize returns the size of the instance queue.
    56  	QueueSize() int
    57  
    58  	// Close closes the writer.
    59  	Close() error
    60  }
    61  
    62  type newLockedEncoderFn func(protobuf.UnaggregatedOptions) *lockedEncoder
    63  
    64  type writer struct {
    65  	metrics            writerMetrics
    66  	encoderOpts        protobuf.UnaggregatedOptions
    67  	queue              instanceQueue
    68  	log                *zap.Logger
    69  	encodersByShard    map[uint32]*lockedEncoder
    70  	newLockedEncoderFn newLockedEncoderFn
    71  	maxTimerBatchSize  int
    72  	maxBatchSize       int
    73  	sync.RWMutex
    74  	closed bool
    75  }
    76  
    77  func newInstanceWriter(instance placement.Instance, opts Options) instanceWriter {
    78  	var (
    79  		iOpts     = opts.InstrumentOptions()
    80  		scope     = iOpts.MetricsScope()
    81  		queueOpts = opts.SetInstrumentOptions(iOpts.SetMetricsScope(scope.SubScope("queue")))
    82  	)
    83  	w := &writer{
    84  		log:               iOpts.Logger(),
    85  		metrics:           newWriterMetrics(scope),
    86  		maxBatchSize:      opts.MaxBatchSize(),
    87  		maxTimerBatchSize: opts.MaxTimerBatchSize(),
    88  		encoderOpts:       opts.EncoderOptions(),
    89  		queue:             newInstanceQueue(instance, queueOpts),
    90  		encodersByShard:   make(map[uint32]*lockedEncoder),
    91  	}
    92  	w.newLockedEncoderFn = newLockedEncoder
    93  	return w
    94  }
    95  
    96  func (w *writer) Write(shard uint32, payload payloadUnion) error {
    97  	w.RLock()
    98  	if w.closed {
    99  		w.RUnlock()
   100  		return errInstanceWriterClosed
   101  	}
   102  	encoder, exists := w.encodersByShard[shard]
   103  	if exists {
   104  		err := w.encodeWithLock(encoder, payload)
   105  		w.RUnlock()
   106  		return err
   107  	}
   108  	w.RUnlock()
   109  
   110  	w.Lock()
   111  	if w.closed {
   112  		w.Unlock()
   113  		return errInstanceWriterClosed
   114  	}
   115  	encoder, exists = w.encodersByShard[shard]
   116  	if exists {
   117  		err := w.encodeWithLock(encoder, payload)
   118  		w.Unlock()
   119  		return err
   120  	}
   121  	encoder = w.newLockedEncoderFn(w.encoderOpts)
   122  	w.encodersByShard[shard] = encoder
   123  	err := w.encodeWithLock(encoder, payload)
   124  	w.Unlock()
   125  
   126  	return err
   127  }
   128  
   129  func (w *writer) Flush() error {
   130  	w.RLock()
   131  	if w.closed {
   132  		w.RUnlock()
   133  		return errInstanceWriterClosed
   134  	}
   135  	err := w.flushWithLock()
   136  	w.RUnlock()
   137  
   138  	if err != nil {
   139  		w.metrics.flushErrors.Inc(1)
   140  		return err
   141  	}
   142  	return nil
   143  }
   144  
   145  func (w *writer) Close() error {
   146  	w.Lock()
   147  	defer w.Unlock()
   148  
   149  	if w.closed {
   150  		return errInstanceWriterClosed
   151  	}
   152  	w.closed = true
   153  	if err := w.flushWithLock(); err != nil {
   154  		w.metrics.flushErrors.Inc(1)
   155  	}
   156  	return w.queue.Close()
   157  }
   158  
   159  func (w *writer) QueueSize() int {
   160  	return w.queue.Size()
   161  }
   162  
   163  func (w *writer) encodeWithLock(
   164  	encoder *lockedEncoder,
   165  	payload payloadUnion,
   166  ) error {
   167  	encoder.Lock()
   168  
   169  	var (
   170  		sizeBefore = encoder.Len()
   171  		err        error
   172  	)
   173  
   174  	switch payload.payloadType {
   175  	case untimedType:
   176  		err = w.encodeUntimedWithLock(encoder, payload.untimed.metric, payload.untimed.metadatas)
   177  	case forwardedType:
   178  		err = w.encodeForwardedWithLock(encoder, payload.forwarded.metric, payload.forwarded.metadata)
   179  	case timedType:
   180  		err = w.encodeTimedWithLock(encoder, payload.timed.metric, payload.timed.metadata)
   181  	case timedWithStagedMetadatasType:
   182  		elem := payload.timedWithStagedMetadatas
   183  		err = w.encodeTimedWithStagedMetadatasWithLock(encoder, elem.metric, elem.metadatas)
   184  	case passthroughType:
   185  		err = w.encodePassthroughWithLock(encoder, payload.passthrough.metric, payload.passthrough.storagePolicy)
   186  	default:
   187  		err = errUnrecognizedPayloadType
   188  	}
   189  
   190  	if err != nil {
   191  		w.metrics.encodeErrors.Inc(1)
   192  		w.log.Error("encode untimed metric error",
   193  			zap.Any("payload", payload),
   194  			zap.Int("payloadType", int(payload.payloadType)),
   195  			zap.Error(err),
   196  		)
   197  		// Rewind buffer and clear out the encoder error.
   198  		encoder.Truncate(sizeBefore) //nolint:errcheck
   199  		encoder.Unlock()
   200  		return err
   201  	}
   202  
   203  	if encoder.Len() < w.maxBatchSize {
   204  		encoder.Unlock()
   205  		return nil
   206  	}
   207  
   208  	buffer := encoder.Relinquish()
   209  	encoder.Unlock()
   210  
   211  	return w.enqueueBuffer(buffer)
   212  }
   213  
   214  func (w *writer) encodeUntimedWithLock(
   215  	encoder *lockedEncoder,
   216  	metricUnion unaggregated.MetricUnion,
   217  	metadatas metadata.StagedMetadatas,
   218  ) error {
   219  	switch metricUnion.Type {
   220  	case metric.CounterType:
   221  		msg := encoding.UnaggregatedMessageUnion{
   222  			Type: encoding.CounterWithMetadatasType,
   223  			CounterWithMetadatas: unaggregated.CounterWithMetadatas{
   224  				Counter:         metricUnion.Counter(),
   225  				StagedMetadatas: metadatas,
   226  			}}
   227  
   228  		return encoder.EncodeMessage(msg)
   229  	case metric.TimerType:
   230  		// If there is no limit on the timer batch size, write the full batch.
   231  		if w.maxTimerBatchSize == 0 {
   232  			msg := encoding.UnaggregatedMessageUnion{
   233  				Type: encoding.BatchTimerWithMetadatasType,
   234  				BatchTimerWithMetadatas: unaggregated.BatchTimerWithMetadatas{
   235  					BatchTimer:      metricUnion.BatchTimer(),
   236  					StagedMetadatas: metadatas,
   237  				}}
   238  
   239  			return encoder.EncodeMessage(msg)
   240  		}
   241  
   242  		// Otherwise, honor maximum timer batch size.
   243  		var (
   244  			batchTimer     = metricUnion.BatchTimer()
   245  			timerValues    = batchTimer.Values
   246  			numTimerValues = len(timerValues)
   247  			start, end     int
   248  		)
   249  
   250  		for start = 0; start < numTimerValues; start = end {
   251  			end = start + w.maxTimerBatchSize
   252  			if end > numTimerValues {
   253  				end = numTimerValues
   254  			}
   255  			singleBatchTimer := unaggregated.BatchTimer{
   256  				ID:         batchTimer.ID,
   257  				Values:     timerValues[start:end],
   258  				Annotation: metricUnion.Annotation,
   259  			}
   260  			msg := encoding.UnaggregatedMessageUnion{
   261  				Type: encoding.BatchTimerWithMetadatasType,
   262  				BatchTimerWithMetadatas: unaggregated.BatchTimerWithMetadatas{
   263  					BatchTimer:      singleBatchTimer,
   264  					StagedMetadatas: metadatas,
   265  				}}
   266  			if err := encoder.EncodeMessage(msg); err != nil {
   267  				return err
   268  			}
   269  
   270  			// Unlock the encoder before we encode another metric to ensure other
   271  			// goroutines have an opportunity to encode metrics while larger timer
   272  			// batches are being encoded.
   273  			if end < numTimerValues {
   274  				encoder.Unlock()
   275  				encoder.Lock()
   276  			}
   277  		}
   278  
   279  		return nil
   280  	case metric.GaugeType:
   281  		msg := encoding.UnaggregatedMessageUnion{
   282  			Type: encoding.GaugeWithMetadatasType,
   283  			GaugeWithMetadatas: unaggregated.GaugeWithMetadatas{
   284  				Gauge:           metricUnion.Gauge(),
   285  				StagedMetadatas: metadatas,
   286  			}}
   287  		return encoder.EncodeMessage(msg)
   288  	default:
   289  	}
   290  
   291  	return errUnrecognizedMetricType
   292  }
   293  
   294  func (w *writer) encodeForwardedWithLock(
   295  	encoder *lockedEncoder,
   296  	metric aggregated.ForwardedMetric,
   297  	metadata metadata.ForwardMetadata,
   298  ) error {
   299  	msg := encoding.UnaggregatedMessageUnion{
   300  		Type: encoding.ForwardedMetricWithMetadataType,
   301  		ForwardedMetricWithMetadata: aggregated.ForwardedMetricWithMetadata{
   302  			ForwardedMetric: metric,
   303  			ForwardMetadata: metadata,
   304  		}}
   305  
   306  	return encoder.EncodeMessage(msg)
   307  }
   308  
   309  func (w *writer) encodeTimedWithLock(
   310  	encoder *lockedEncoder,
   311  	metric aggregated.Metric,
   312  	metadata metadata.TimedMetadata,
   313  ) error {
   314  	msg := encoding.UnaggregatedMessageUnion{
   315  		Type: encoding.TimedMetricWithMetadataType,
   316  		TimedMetricWithMetadata: aggregated.TimedMetricWithMetadata{
   317  			Metric:        metric,
   318  			TimedMetadata: metadata,
   319  		}}
   320  
   321  	return encoder.EncodeMessage(msg)
   322  }
   323  
   324  func (w *writer) encodeTimedWithStagedMetadatasWithLock(
   325  	encoder *lockedEncoder,
   326  	metric aggregated.Metric,
   327  	metadatas metadata.StagedMetadatas,
   328  ) error {
   329  	msg := encoding.UnaggregatedMessageUnion{
   330  		Type: encoding.TimedMetricWithMetadatasType,
   331  		TimedMetricWithMetadatas: aggregated.TimedMetricWithMetadatas{
   332  			Metric:          metric,
   333  			StagedMetadatas: metadatas,
   334  		}}
   335  
   336  	return encoder.EncodeMessage(msg)
   337  }
   338  
   339  func (w *writer) encodePassthroughWithLock(
   340  	encoder *lockedEncoder,
   341  	metric aggregated.Metric,
   342  	storagePolicy policy.StoragePolicy,
   343  ) error {
   344  	msg := encoding.UnaggregatedMessageUnion{
   345  		Type: encoding.PassthroughMetricWithMetadataType,
   346  		PassthroughMetricWithMetadata: aggregated.PassthroughMetricWithMetadata{
   347  			Metric:        metric,
   348  			StoragePolicy: storagePolicy,
   349  		}}
   350  
   351  	return encoder.EncodeMessage(msg)
   352  }
   353  
   354  func (w *writer) flushWithLock() error {
   355  	multiErr := xerrors.NewMultiError()
   356  	for _, encoder := range w.encodersByShard {
   357  		encoder.Lock()
   358  		if encoder.Len() == 0 {
   359  			encoder.Unlock()
   360  			continue
   361  		}
   362  		buffer := encoder.Relinquish()
   363  		encoder.Unlock()
   364  		if err := w.enqueueBuffer(buffer); err != nil {
   365  			multiErr = multiErr.Add(err)
   366  		}
   367  	}
   368  
   369  	w.queue.Flush()
   370  
   371  	return multiErr.FinalError()
   372  }
   373  
   374  func (w *writer) enqueueBuffer(buf protobuf.Buffer) error {
   375  	if err := w.queue.Enqueue(buf); err != nil {
   376  		w.metrics.enqueueErrors.Inc(1)
   377  		return err
   378  	}
   379  	w.metrics.buffersEnqueued.Inc(1)
   380  	return nil
   381  }
   382  
   383  const (
   384  	buffersMetric = "buffers"
   385  	actionTag     = "action"
   386  )
   387  
   388  type writerMetrics struct {
   389  	buffersEnqueued tally.Counter
   390  	encodeErrors    tally.Counter
   391  	enqueueErrors   tally.Counter
   392  	flushErrors     tally.Counter
   393  }
   394  
   395  func newWriterMetrics(s tally.Scope) writerMetrics {
   396  	return writerMetrics{
   397  		buffersEnqueued: s.Tagged(map[string]string{actionTag: "enqueued"}).Counter(buffersMetric),
   398  		encodeErrors:    s.Tagged(map[string]string{actionTag: "encode-error"}).Counter(buffersMetric),
   399  		enqueueErrors:   s.Tagged(map[string]string{actionTag: "enqueue-error"}).Counter(buffersMetric),
   400  		flushErrors:     s.Tagged(map[string]string{actionTag: "flush-error"}).Counter(buffersMetric),
   401  	}
   402  }
   403  
   404  type lockedEncoder struct {
   405  	protobuf.UnaggregatedEncoder
   406  	sync.Mutex
   407  }
   408  
   409  func newLockedEncoder(encoderOpts protobuf.UnaggregatedOptions) *lockedEncoder {
   410  	encoder := protobuf.NewUnaggregatedEncoder(encoderOpts)
   411  	return &lockedEncoder{UnaggregatedEncoder: encoder}
   412  }
   413  
   414  type refCountedWriter struct {
   415  	instanceWriter
   416  	refCount
   417  	dirty atomic.Bool
   418  }
   419  
   420  func newRefCountedWriter(instance placement.Instance, opts Options) *refCountedWriter {
   421  	rcWriter := &refCountedWriter{instanceWriter: newInstanceWriter(instance, opts)}
   422  	rcWriter.refCount.SetDestructor(rcWriter.Close)
   423  	return rcWriter
   424  }
   425  
   426  func (rcWriter *refCountedWriter) Close() {
   427  	// NB: closing the writer needs to be done asynchronously because it may
   428  	// be called by writer manager while holding a lock that blocks any writes
   429  	// from proceeding.
   430  	go rcWriter.instanceWriter.Close() // nolint: errcheck
   431  }