github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/client/writer_mgr.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package client
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/cluster/placement"
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  	xsync "github.com/m3db/m3/src/x/sync"
    32  
    33  	"github.com/uber-go/tally"
    34  	"golang.org/x/sys/cpu"
    35  )
    36  
    37  var (
    38  	errInstanceWriterManagerClosed = errors.New("instance writer manager closed")
    39  )
    40  
    41  const (
    42  	_queueMetricReportInterval = 10 * time.Second
    43  	_queueMetricBuckets        = 8
    44  	_queueMetricBucketStart    = 64
    45  )
    46  
    47  // instanceWriterManager manages instance writers.
    48  type instanceWriterManager interface {
    49  	// AddInstances adds instances.
    50  	AddInstances(instances []placement.Instance) error
    51  
    52  	// RemoveInstances removes instancess.
    53  	RemoveInstances(instances []placement.Instance) error
    54  
    55  	// Write writes a metric payload.
    56  	Write(
    57  		instance placement.Instance,
    58  		shardID uint32,
    59  		payload payloadUnion,
    60  	) error
    61  
    62  	// Flush flushes buffered metrics.
    63  	Flush() error
    64  
    65  	// Close closes the writer manager.
    66  	Close() error
    67  }
    68  
    69  type writerManagerMetrics struct {
    70  	instancesAdded      tally.Counter
    71  	instancesRemoved    tally.Counter
    72  	queueLen            tally.Histogram
    73  	dirtyWritersPercent tally.Histogram
    74  }
    75  
    76  func newWriterManagerMetrics(scope tally.Scope) writerManagerMetrics {
    77  	buckets := append(
    78  		tally.ValueBuckets{0},
    79  		tally.MustMakeExponentialValueBuckets(_queueMetricBucketStart, 2, _queueMetricBuckets)...,
    80  	)
    81  
    82  	percentBuckets := append(
    83  		tally.ValueBuckets{0},
    84  		tally.MustMakeLinearValueBuckets(5, 5, 20)...,
    85  	)
    86  
    87  	return writerManagerMetrics{
    88  		instancesAdded: scope.Tagged(map[string]string{
    89  			"action": "add",
    90  		}).Counter("instances"),
    91  		instancesRemoved: scope.Tagged(map[string]string{
    92  			"action": "remove",
    93  		}).Counter("instances"),
    94  		queueLen:            scope.Histogram("queue-length", buckets),
    95  		dirtyWritersPercent: scope.Histogram("dirty-writers-percent", percentBuckets),
    96  	}
    97  }
    98  
    99  type writerManager struct {
   100  	sync.RWMutex
   101  	wg      sync.WaitGroup
   102  	doneCh  chan struct{}
   103  	opts    Options
   104  	writers map[string]*refCountedWriter
   105  	closed  bool
   106  	metrics writerManagerMetrics
   107  	_       cpu.CacheLinePad
   108  	pool    xsync.PooledWorkerPool
   109  }
   110  
   111  func newInstanceWriterManager(opts Options) (instanceWriterManager, error) {
   112  	wm := &writerManager{
   113  		opts:    opts,
   114  		writers: make(map[string]*refCountedWriter),
   115  		metrics: newWriterManagerMetrics(opts.InstrumentOptions().MetricsScope()),
   116  		doneCh:  make(chan struct{}),
   117  	}
   118  
   119  	pool, err := xsync.NewPooledWorkerPool(
   120  		opts.FlushWorkerCount(),
   121  		xsync.NewPooledWorkerPoolOptions().SetKillWorkerProbability(0.05),
   122  	)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	wm.pool = pool
   128  	wm.pool.Init()
   129  
   130  	wm.wg.Add(1)
   131  	go wm.reportMetricsLoop()
   132  
   133  	if opts.ForceFlushEvery() > 0 {
   134  		wm.wg.Add(1)
   135  		go func() {
   136  			wm.flushLoop(opts.ForceFlushEvery())
   137  		}()
   138  	}
   139  
   140  	return wm, nil
   141  }
   142  
   143  func (mgr *writerManager) AddInstances(instances []placement.Instance) error {
   144  	mgr.Lock()
   145  	defer mgr.Unlock()
   146  
   147  	if mgr.closed {
   148  		return errInstanceWriterManagerClosed
   149  	}
   150  	for _, instance := range instances {
   151  		id := instance.ID()
   152  		writer, exists := mgr.writers[id]
   153  		if !exists {
   154  			instrumentOpts := mgr.opts.InstrumentOptions()
   155  			scope := instrumentOpts.MetricsScope()
   156  			opts := mgr.opts.SetInstrumentOptions(instrumentOpts.SetMetricsScope(scope.SubScope("writer")))
   157  			writer = newRefCountedWriter(instance, opts)
   158  			mgr.writers[id] = writer
   159  			mgr.metrics.instancesAdded.Inc(1)
   160  		}
   161  		writer.IncRef()
   162  	}
   163  	return nil
   164  }
   165  
   166  func (mgr *writerManager) RemoveInstances(instances []placement.Instance) error {
   167  	mgr.Lock()
   168  	defer mgr.Unlock()
   169  
   170  	if mgr.closed {
   171  		return errInstanceWriterManagerClosed
   172  	}
   173  	for _, instance := range instances {
   174  		id := instance.ID()
   175  		writer, exists := mgr.writers[id]
   176  		if !exists {
   177  			continue
   178  		}
   179  		if writer.DecRef() == 0 {
   180  			delete(mgr.writers, id)
   181  			mgr.metrics.instancesRemoved.Inc(1)
   182  		}
   183  	}
   184  	return nil
   185  }
   186  
   187  func (mgr *writerManager) Write(
   188  	instance placement.Instance,
   189  	shardID uint32,
   190  	payload payloadUnion,
   191  ) error {
   192  	mgr.RLock()
   193  	if mgr.closed {
   194  		mgr.RUnlock()
   195  		return errInstanceWriterManagerClosed
   196  	}
   197  	id := instance.ID()
   198  	writer, exists := mgr.writers[id]
   199  	if !exists {
   200  		mgr.RUnlock()
   201  		return fmt.Errorf("writer for instance %s is not found", id)
   202  	}
   203  	writer.dirty.Store(true)
   204  	err := writer.Write(shardID, payload)
   205  	mgr.RUnlock()
   206  
   207  	return err
   208  }
   209  
   210  func (mgr *writerManager) Flush() error {
   211  	mgr.RLock()
   212  	defer mgr.RUnlock()
   213  
   214  	if mgr.closed {
   215  		return errInstanceWriterManagerClosed
   216  	}
   217  
   218  	var (
   219  		errCh  = make(chan error, 1)
   220  		mErrCh = make(chan xerrors.MultiError, 1)
   221  		wg     sync.WaitGroup
   222  	)
   223  
   224  	numDirty := 0
   225  	for _, w := range mgr.writers {
   226  		if !w.dirty.Load() {
   227  			continue
   228  		}
   229  		numDirty++
   230  		w := w
   231  		wg.Add(1)
   232  		mgr.pool.Go(func() {
   233  			defer wg.Done()
   234  
   235  			w.dirty.CAS(true, false)
   236  			if err := w.Flush(); err != nil {
   237  				errCh <- err
   238  			}
   239  		})
   240  	}
   241  
   242  	percentInUse := 0.0
   243  	if numDirty > 0 && len(mgr.writers) > 0 {
   244  		percentInUse = 100.0 * (float64(numDirty) / float64(len(mgr.writers)))
   245  	}
   246  	mgr.metrics.dirtyWritersPercent.RecordValue(percentInUse)
   247  
   248  	go func() {
   249  		multiErr := xerrors.NewMultiError()
   250  		for err := range errCh {
   251  			multiErr = multiErr.Add(err)
   252  		}
   253  		mErrCh <- multiErr
   254  	}()
   255  	wg.Wait()
   256  	close(errCh)
   257  
   258  	multiErr := <-mErrCh
   259  	return multiErr.FinalError()
   260  }
   261  
   262  func (mgr *writerManager) Close() error {
   263  	mgr.Lock()
   264  
   265  	if mgr.closed {
   266  		mgr.Unlock()
   267  		return errInstanceWriterManagerClosed
   268  	}
   269  
   270  	mgr.closed = true
   271  	for _, writer := range mgr.writers {
   272  		writer.Close()
   273  	}
   274  
   275  	close(mgr.doneCh)
   276  	mgr.Unlock()
   277  	mgr.wg.Wait()
   278  
   279  	return nil
   280  }
   281  
   282  func (mgr *writerManager) reportMetricsLoop() {
   283  	defer mgr.wg.Done()
   284  
   285  	ticker := time.NewTicker(_queueMetricReportInterval)
   286  	defer ticker.Stop()
   287  
   288  	for {
   289  		select {
   290  		case <-mgr.doneCh:
   291  			return
   292  		case <-ticker.C:
   293  			mgr.reportMetrics()
   294  		}
   295  	}
   296  }
   297  
   298  func (mgr *writerManager) reportMetrics() {
   299  	mgr.RLock()
   300  	defer mgr.RUnlock()
   301  
   302  	for _, writer := range mgr.writers {
   303  		mgr.metrics.queueLen.RecordValue(float64(writer.QueueSize()))
   304  	}
   305  }
   306  
   307  func (mgr *writerManager) flushLoop(d time.Duration) {
   308  	defer mgr.wg.Done()
   309  
   310  	ticker := time.NewTicker(d)
   311  	defer ticker.Stop()
   312  
   313  	for {
   314  		select {
   315  		case <-mgr.doneCh:
   316  			return
   317  		case <-ticker.C:
   318  			mgr.Flush() //nolint:errcheck
   319  		}
   320  	}
   321  }