github.com/m3db/m3@v1.5.0/src/msg/producer/writer/shard_writer.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package writer
    22  
    23  import (
    24  	"sync"
    25  
    26  	"github.com/m3db/m3/src/cluster/placement"
    27  	"github.com/m3db/m3/src/msg/producer"
    28  
    29  	"go.uber.org/atomic"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  type shardWriter interface {
    34  	// Write writes the reference counted message, this needs to be thread safe.
    35  	Write(rm *producer.RefCountedMessage)
    36  
    37  	// UpdateInstances updates the instances responsible for this shard.
    38  	UpdateInstances(
    39  		instances []placement.Instance,
    40  		cws map[string]consumerWriter,
    41  	)
    42  
    43  	// SetMessageTTLNanos sets the message ttl nanoseconds.
    44  	SetMessageTTLNanos(value int64)
    45  
    46  	// Close closes the shard writer.
    47  	Close()
    48  
    49  	// QueueSize returns the number of messages queued for the shard.
    50  	QueueSize() int
    51  }
    52  
    53  type sharedShardWriter struct {
    54  	instances map[string]struct{}
    55  	mw        messageWriter
    56  	isClosed  *atomic.Bool
    57  }
    58  
    59  func newSharedShardWriter(
    60  	shard uint32,
    61  	router ackRouter,
    62  	mPool messagePool,
    63  	opts Options,
    64  	m messageWriterMetrics,
    65  ) shardWriter {
    66  	replicatedShardID := uint64(shard)
    67  	mw := newMessageWriter(replicatedShardID, mPool, opts, m)
    68  	mw.Init()
    69  	router.Register(replicatedShardID, mw)
    70  	return &sharedShardWriter{
    71  		instances: make(map[string]struct{}),
    72  		mw:        mw,
    73  		isClosed:  atomic.NewBool(false),
    74  	}
    75  }
    76  
    77  func (w *sharedShardWriter) Write(rm *producer.RefCountedMessage) {
    78  	w.mw.Write(rm)
    79  }
    80  
    81  // This is not thread safe, must be called in one thread.
    82  func (w *sharedShardWriter) UpdateInstances(
    83  	instances []placement.Instance,
    84  	cws map[string]consumerWriter,
    85  ) {
    86  	var (
    87  		newInstancesMap = make(map[string]struct{}, len(instances))
    88  		toBeDeleted     = w.instances
    89  	)
    90  	for _, instance := range instances {
    91  		id := instance.Endpoint()
    92  		newInstancesMap[id] = struct{}{}
    93  		if _, ok := toBeDeleted[id]; ok {
    94  			// Existing instance.
    95  			delete(toBeDeleted, id)
    96  			continue
    97  		}
    98  		// Add the consumer writer to the message writer.
    99  		w.mw.AddConsumerWriter(cws[id])
   100  	}
   101  	for id := range toBeDeleted {
   102  		w.mw.RemoveConsumerWriter(id)
   103  	}
   104  	w.instances = newInstancesMap
   105  }
   106  
   107  func (w *sharedShardWriter) Close() {
   108  	if !w.isClosed.CAS(false, true) {
   109  		return
   110  	}
   111  	w.mw.Close()
   112  }
   113  
   114  func (w *sharedShardWriter) QueueSize() int {
   115  	return w.mw.QueueSize()
   116  }
   117  
   118  func (w *sharedShardWriter) SetMessageTTLNanos(value int64) {
   119  	w.mw.SetMessageTTLNanos(value)
   120  }
   121  
   122  // nolint: maligned
   123  type replicatedShardWriter struct {
   124  	sync.RWMutex
   125  
   126  	shard          uint32
   127  	numberOfShards uint32
   128  	mPool          messagePool
   129  	ackRouter      ackRouter
   130  	opts           Options
   131  	logger         *zap.Logger
   132  	m              messageWriterMetrics
   133  
   134  	messageWriters  map[string]messageWriter
   135  	messageTTLNanos int64
   136  	replicaID       uint32
   137  	isClosed        bool
   138  }
   139  
   140  func newReplicatedShardWriter(
   141  	shard, numberOfShards uint32,
   142  	router ackRouter,
   143  	mPool messagePool,
   144  	opts Options,
   145  	m messageWriterMetrics,
   146  ) shardWriter {
   147  	return &replicatedShardWriter{
   148  		shard:          shard,
   149  		numberOfShards: numberOfShards,
   150  		mPool:          mPool,
   151  		opts:           opts,
   152  		logger:         opts.InstrumentOptions().Logger(),
   153  		ackRouter:      router,
   154  		replicaID:      0,
   155  		messageWriters: make(map[string]messageWriter),
   156  		isClosed:       false,
   157  		m:              m,
   158  	}
   159  }
   160  
   161  func (w *replicatedShardWriter) Write(rm *producer.RefCountedMessage) {
   162  	w.RLock()
   163  	if len(w.messageWriters) == 0 {
   164  		w.RUnlock()
   165  		w.m.noWritersError.Inc(1)
   166  		w.logger.Error("no message writers available for shard", zap.Uint32("shard", rm.Shard()))
   167  		return
   168  	}
   169  	for _, mw := range w.messageWriters {
   170  		mw.Write(rm)
   171  	}
   172  	w.RUnlock()
   173  }
   174  
   175  // This is not thread safe, must be called in one thread.
   176  func (w *replicatedShardWriter) UpdateInstances(
   177  	instances []placement.Instance,
   178  	cws map[string]consumerWriter,
   179  ) {
   180  	// TODO: Schedule time after shardcutoff to clean up message writers that
   181  	// are already cutoff. Otherwise it will wait until next placement change
   182  	// to clean up.
   183  	var (
   184  		newMessageWriters = make(map[string]messageWriter, len(instances))
   185  		toBeClosed        []messageWriter
   186  		toBeAdded         = make(map[placement.Instance]consumerWriter, len(instances))
   187  		oldMessageWriters = w.messageWriters
   188  	)
   189  	for _, instance := range instances {
   190  		key := instance.Endpoint()
   191  		if mw, ok := oldMessageWriters[key]; ok {
   192  			newMessageWriters[key] = mw
   193  			// Existing instance, try to update cutover cutoff times.
   194  			w.updateCutoverCutoffNanos(mw, instance)
   195  			continue
   196  		}
   197  		// This is a new instance.
   198  		toBeAdded[instance] = cws[key]
   199  	}
   200  	for id, mw := range oldMessageWriters {
   201  		if _, ok := newMessageWriters[id]; ok {
   202  			// Still in the new placement.
   203  			continue
   204  		}
   205  		// Keep the existing message writer and swap the consumer writer in it
   206  		// with  a new consumer writer in the placement update, so that the
   207  		// messages buffered in the existing message writer can be tried on
   208  		// the new consumer writer.
   209  		if instance, cw, ok := anyKeyValueInMap(toBeAdded); ok {
   210  			mw.AddConsumerWriter(cw)
   211  			mw.RemoveConsumerWriter(id)
   212  			// a replicated writer only has a single downstream consumer instance at a time so we can update the
   213  			// metrics with a useful consumer label.
   214  			mw.SetMetrics(mw.Metrics().withConsumer(instance.ID()))
   215  			w.updateCutoverCutoffNanos(mw, instance)
   216  			newMessageWriters[instance.Endpoint()] = mw
   217  			delete(toBeAdded, instance)
   218  			continue
   219  		}
   220  		toBeClosed = append(toBeClosed, mw)
   221  	}
   222  
   223  	// If there are more instances for this shard, this happens when user
   224  	// increased replication factor for the placement or just this shard.
   225  	for instance, cw := range toBeAdded {
   226  		replicatedShardID := uint64(w.replicaID*w.numberOfShards + w.shard)
   227  		w.replicaID++
   228  		mw := newMessageWriter(replicatedShardID, w.mPool, w.opts, w.m)
   229  		mw.AddConsumerWriter(cw)
   230  		mw.SetMetrics(mw.Metrics().withConsumer(instance.ID()))
   231  		w.updateCutoverCutoffNanos(mw, instance)
   232  		mw.Init()
   233  		w.ackRouter.Register(replicatedShardID, mw)
   234  		newMessageWriters[instance.Endpoint()] = mw
   235  	}
   236  
   237  	w.Lock()
   238  	w.messageWriters = newMessageWriters
   239  	w.setMessageTTLNanosWithLock(w.messageTTLNanos)
   240  	w.Unlock()
   241  
   242  	// If there are less instances for this shard, this happens when user
   243  	// reduced replication factor for the placement or just this shard.
   244  	for _, mw := range toBeClosed {
   245  		mw := mw
   246  		// This needs to be in done in a go routine as closing a message writer will
   247  		// block until all messages consumed.
   248  		go func() {
   249  			mw.Close()
   250  			w.ackRouter.Unregister(mw.ReplicatedShardID())
   251  		}()
   252  	}
   253  }
   254  
   255  func (w *replicatedShardWriter) updateCutoverCutoffNanos(
   256  	mw messageWriter,
   257  	instance placement.Instance,
   258  ) {
   259  	s, ok := instance.Shards().Shard(w.shard)
   260  	if !ok {
   261  		// Unexpected.
   262  		w.logger.Error("could not find shard on instance",
   263  			zap.Uint32("shard", w.shard), zap.String("instance", instance.Endpoint()))
   264  		return
   265  	}
   266  	mw.SetCutoffNanos(s.CutoffNanos())
   267  	mw.SetCutoverNanos(s.CutoverNanos())
   268  }
   269  
   270  func (w *replicatedShardWriter) Close() {
   271  	w.Lock()
   272  	defer w.Unlock()
   273  
   274  	if w.isClosed {
   275  		return
   276  	}
   277  	w.isClosed = true
   278  	for _, mw := range w.messageWriters {
   279  		mw.Close()
   280  	}
   281  }
   282  
   283  func (w *replicatedShardWriter) QueueSize() int {
   284  	w.RLock()
   285  	mws := w.messageWriters
   286  	var l int
   287  	for _, mw := range mws {
   288  		l += mw.QueueSize()
   289  	}
   290  	w.RUnlock()
   291  	return l
   292  }
   293  
   294  func (w *replicatedShardWriter) SetMessageTTLNanos(value int64) {
   295  	w.Lock()
   296  	w.messageTTLNanos = value
   297  	w.setMessageTTLNanosWithLock(value)
   298  	w.Unlock()
   299  }
   300  
   301  func (w *replicatedShardWriter) setMessageTTLNanosWithLock(value int64) {
   302  	for _, mw := range w.messageWriters {
   303  		mw.SetMessageTTLNanos(value)
   304  	}
   305  }
   306  
   307  func anyKeyValueInMap(
   308  	m map[placement.Instance]consumerWriter,
   309  ) (placement.Instance, consumerWriter, bool) {
   310  	for key, value := range m {
   311  		return key, value, true
   312  	}
   313  	return nil, nil, false
   314  }