github.com/m3db/m3@v1.5.0/src/msg/producer/writer/shard_writer.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package writer 22 23 import ( 24 "sync" 25 26 "github.com/m3db/m3/src/cluster/placement" 27 "github.com/m3db/m3/src/msg/producer" 28 29 "go.uber.org/atomic" 30 "go.uber.org/zap" 31 ) 32 33 type shardWriter interface { 34 // Write writes the reference counted message, this needs to be thread safe. 35 Write(rm *producer.RefCountedMessage) 36 37 // UpdateInstances updates the instances responsible for this shard. 38 UpdateInstances( 39 instances []placement.Instance, 40 cws map[string]consumerWriter, 41 ) 42 43 // SetMessageTTLNanos sets the message ttl nanoseconds. 44 SetMessageTTLNanos(value int64) 45 46 // Close closes the shard writer. 47 Close() 48 49 // QueueSize returns the number of messages queued for the shard. 50 QueueSize() int 51 } 52 53 type sharedShardWriter struct { 54 instances map[string]struct{} 55 mw messageWriter 56 isClosed *atomic.Bool 57 } 58 59 func newSharedShardWriter( 60 shard uint32, 61 router ackRouter, 62 mPool messagePool, 63 opts Options, 64 m messageWriterMetrics, 65 ) shardWriter { 66 replicatedShardID := uint64(shard) 67 mw := newMessageWriter(replicatedShardID, mPool, opts, m) 68 mw.Init() 69 router.Register(replicatedShardID, mw) 70 return &sharedShardWriter{ 71 instances: make(map[string]struct{}), 72 mw: mw, 73 isClosed: atomic.NewBool(false), 74 } 75 } 76 77 func (w *sharedShardWriter) Write(rm *producer.RefCountedMessage) { 78 w.mw.Write(rm) 79 } 80 81 // This is not thread safe, must be called in one thread. 82 func (w *sharedShardWriter) UpdateInstances( 83 instances []placement.Instance, 84 cws map[string]consumerWriter, 85 ) { 86 var ( 87 newInstancesMap = make(map[string]struct{}, len(instances)) 88 toBeDeleted = w.instances 89 ) 90 for _, instance := range instances { 91 id := instance.Endpoint() 92 newInstancesMap[id] = struct{}{} 93 if _, ok := toBeDeleted[id]; ok { 94 // Existing instance. 95 delete(toBeDeleted, id) 96 continue 97 } 98 // Add the consumer writer to the message writer. 99 w.mw.AddConsumerWriter(cws[id]) 100 } 101 for id := range toBeDeleted { 102 w.mw.RemoveConsumerWriter(id) 103 } 104 w.instances = newInstancesMap 105 } 106 107 func (w *sharedShardWriter) Close() { 108 if !w.isClosed.CAS(false, true) { 109 return 110 } 111 w.mw.Close() 112 } 113 114 func (w *sharedShardWriter) QueueSize() int { 115 return w.mw.QueueSize() 116 } 117 118 func (w *sharedShardWriter) SetMessageTTLNanos(value int64) { 119 w.mw.SetMessageTTLNanos(value) 120 } 121 122 // nolint: maligned 123 type replicatedShardWriter struct { 124 sync.RWMutex 125 126 shard uint32 127 numberOfShards uint32 128 mPool messagePool 129 ackRouter ackRouter 130 opts Options 131 logger *zap.Logger 132 m messageWriterMetrics 133 134 messageWriters map[string]messageWriter 135 messageTTLNanos int64 136 replicaID uint32 137 isClosed bool 138 } 139 140 func newReplicatedShardWriter( 141 shard, numberOfShards uint32, 142 router ackRouter, 143 mPool messagePool, 144 opts Options, 145 m messageWriterMetrics, 146 ) shardWriter { 147 return &replicatedShardWriter{ 148 shard: shard, 149 numberOfShards: numberOfShards, 150 mPool: mPool, 151 opts: opts, 152 logger: opts.InstrumentOptions().Logger(), 153 ackRouter: router, 154 replicaID: 0, 155 messageWriters: make(map[string]messageWriter), 156 isClosed: false, 157 m: m, 158 } 159 } 160 161 func (w *replicatedShardWriter) Write(rm *producer.RefCountedMessage) { 162 w.RLock() 163 if len(w.messageWriters) == 0 { 164 w.RUnlock() 165 w.m.noWritersError.Inc(1) 166 w.logger.Error("no message writers available for shard", zap.Uint32("shard", rm.Shard())) 167 return 168 } 169 for _, mw := range w.messageWriters { 170 mw.Write(rm) 171 } 172 w.RUnlock() 173 } 174 175 // This is not thread safe, must be called in one thread. 176 func (w *replicatedShardWriter) UpdateInstances( 177 instances []placement.Instance, 178 cws map[string]consumerWriter, 179 ) { 180 // TODO: Schedule time after shardcutoff to clean up message writers that 181 // are already cutoff. Otherwise it will wait until next placement change 182 // to clean up. 183 var ( 184 newMessageWriters = make(map[string]messageWriter, len(instances)) 185 toBeClosed []messageWriter 186 toBeAdded = make(map[placement.Instance]consumerWriter, len(instances)) 187 oldMessageWriters = w.messageWriters 188 ) 189 for _, instance := range instances { 190 key := instance.Endpoint() 191 if mw, ok := oldMessageWriters[key]; ok { 192 newMessageWriters[key] = mw 193 // Existing instance, try to update cutover cutoff times. 194 w.updateCutoverCutoffNanos(mw, instance) 195 continue 196 } 197 // This is a new instance. 198 toBeAdded[instance] = cws[key] 199 } 200 for id, mw := range oldMessageWriters { 201 if _, ok := newMessageWriters[id]; ok { 202 // Still in the new placement. 203 continue 204 } 205 // Keep the existing message writer and swap the consumer writer in it 206 // with a new consumer writer in the placement update, so that the 207 // messages buffered in the existing message writer can be tried on 208 // the new consumer writer. 209 if instance, cw, ok := anyKeyValueInMap(toBeAdded); ok { 210 mw.AddConsumerWriter(cw) 211 mw.RemoveConsumerWriter(id) 212 // a replicated writer only has a single downstream consumer instance at a time so we can update the 213 // metrics with a useful consumer label. 214 mw.SetMetrics(mw.Metrics().withConsumer(instance.ID())) 215 w.updateCutoverCutoffNanos(mw, instance) 216 newMessageWriters[instance.Endpoint()] = mw 217 delete(toBeAdded, instance) 218 continue 219 } 220 toBeClosed = append(toBeClosed, mw) 221 } 222 223 // If there are more instances for this shard, this happens when user 224 // increased replication factor for the placement or just this shard. 225 for instance, cw := range toBeAdded { 226 replicatedShardID := uint64(w.replicaID*w.numberOfShards + w.shard) 227 w.replicaID++ 228 mw := newMessageWriter(replicatedShardID, w.mPool, w.opts, w.m) 229 mw.AddConsumerWriter(cw) 230 mw.SetMetrics(mw.Metrics().withConsumer(instance.ID())) 231 w.updateCutoverCutoffNanos(mw, instance) 232 mw.Init() 233 w.ackRouter.Register(replicatedShardID, mw) 234 newMessageWriters[instance.Endpoint()] = mw 235 } 236 237 w.Lock() 238 w.messageWriters = newMessageWriters 239 w.setMessageTTLNanosWithLock(w.messageTTLNanos) 240 w.Unlock() 241 242 // If there are less instances for this shard, this happens when user 243 // reduced replication factor for the placement or just this shard. 244 for _, mw := range toBeClosed { 245 mw := mw 246 // This needs to be in done in a go routine as closing a message writer will 247 // block until all messages consumed. 248 go func() { 249 mw.Close() 250 w.ackRouter.Unregister(mw.ReplicatedShardID()) 251 }() 252 } 253 } 254 255 func (w *replicatedShardWriter) updateCutoverCutoffNanos( 256 mw messageWriter, 257 instance placement.Instance, 258 ) { 259 s, ok := instance.Shards().Shard(w.shard) 260 if !ok { 261 // Unexpected. 262 w.logger.Error("could not find shard on instance", 263 zap.Uint32("shard", w.shard), zap.String("instance", instance.Endpoint())) 264 return 265 } 266 mw.SetCutoffNanos(s.CutoffNanos()) 267 mw.SetCutoverNanos(s.CutoverNanos()) 268 } 269 270 func (w *replicatedShardWriter) Close() { 271 w.Lock() 272 defer w.Unlock() 273 274 if w.isClosed { 275 return 276 } 277 w.isClosed = true 278 for _, mw := range w.messageWriters { 279 mw.Close() 280 } 281 } 282 283 func (w *replicatedShardWriter) QueueSize() int { 284 w.RLock() 285 mws := w.messageWriters 286 var l int 287 for _, mw := range mws { 288 l += mw.QueueSize() 289 } 290 w.RUnlock() 291 return l 292 } 293 294 func (w *replicatedShardWriter) SetMessageTTLNanos(value int64) { 295 w.Lock() 296 w.messageTTLNanos = value 297 w.setMessageTTLNanosWithLock(value) 298 w.Unlock() 299 } 300 301 func (w *replicatedShardWriter) setMessageTTLNanosWithLock(value int64) { 302 for _, mw := range w.messageWriters { 303 mw.SetMessageTTLNanos(value) 304 } 305 } 306 307 func anyKeyValueInMap( 308 m map[placement.Instance]consumerWriter, 309 ) (placement.Instance, consumerWriter, bool) { 310 for key, value := range m { 311 return key, value, true 312 } 313 return nil, nil, false 314 }