github.com/matrixorigin/matrixone@v0.7.0/pkg/util/export/batch_processor.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package export 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "runtime" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/matrixorigin/matrixone/pkg/common/moerr" 27 "github.com/matrixorigin/matrixone/pkg/logutil" 28 "github.com/matrixorigin/matrixone/pkg/util/batchpipe" 29 "github.com/matrixorigin/matrixone/pkg/util/trace" 30 "github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace" 31 ) 32 33 const defaultQueueSize = 1310720 // queue mem cost = 10MB 34 35 // bufferHolder hold ItemBuffer content, handle buffer's new/flush/reset/reminder(base on timer) operations. 36 // work like: 37 // ---> Add ---> ShouldFlush or trigger.signal -----> StopAndGetBatch ---> FlushAndReset ---> Add ---> ... 38 // # ^ |No |Yes, go next call 39 // # |<------------------/Accept next Add 40 type bufferHolder struct { 41 ctx context.Context 42 // name like a type 43 name string 44 // buffer is instance of batchpipe.ItemBuffer with its own elimination algorithm(like LRU, LFU) 45 buffer batchpipe.ItemBuffer[batchpipe.HasName, any] 46 // signal send signal to Collector 47 signal bufferSignalFunc // see awakeBufferFactory 48 // impl NewItemBatchHandler 49 impl motrace.PipeImpl 50 // trigger handle Reminder strategy 51 trigger *time.Timer 52 53 mux sync.Mutex 54 } 55 56 type bufferSignalFunc func(*bufferHolder) 57 58 func newBufferHolder(ctx context.Context, name batchpipe.HasName, impl motrace.PipeImpl, signal bufferSignalFunc) *bufferHolder { 59 buffer := impl.NewItemBuffer(name.GetName()) 60 b := &bufferHolder{ 61 ctx: ctx, 62 name: name.GetName(), 63 buffer: buffer, 64 signal: signal, 65 impl: impl, 66 } 67 b.mux.Lock() 68 defer b.mux.Unlock() 69 b.trigger = time.AfterFunc(time.Hour, func() {}) 70 return b 71 } 72 73 // Start separated from newBufferHolder, should call only once, fix trigger started before first Add 74 func (b *bufferHolder) Start() { 75 b.mux.Lock() 76 defer b.mux.Unlock() 77 reminder := b.buffer.(batchpipe.Reminder) 78 b.trigger.Stop() 79 b.trigger = time.AfterFunc(reminder.RemindNextAfter(), func() { 80 if b.mux.TryLock() { 81 b.mux.Unlock() 82 } 83 b.signal(b) 84 }) 85 } 86 87 // Add call buffer.Add(), while bufferHolder is NOT readonly 88 func (b *bufferHolder) Add(item batchpipe.HasName) { 89 b.mux.Lock() 90 buf := b.buffer 91 buf.Add(item) 92 b.mux.Unlock() 93 if buf.ShouldFlush() { 94 b.signal(b) 95 } 96 } 97 98 var _ generateReq = (*bufferGenerateReq)(nil) 99 100 type bufferGenerateReq struct { 101 buffer batchpipe.ItemBuffer[batchpipe.HasName, any] 102 // impl NewItemBatchHandler 103 b *bufferHolder 104 } 105 106 func (r *bufferGenerateReq) handle(buf *bytes.Buffer) (exportReq, error) { 107 batch := r.buffer.GetBatch(r.b.ctx, buf) 108 return &bufferExportReq{ 109 batch: batch, 110 b: r.b, 111 }, nil 112 } 113 114 func (r *bufferGenerateReq) callback(err error) {} 115 116 var _ exportReq = (*bufferExportReq)(nil) 117 118 type bufferExportReq struct { 119 batch any 120 b *bufferHolder 121 } 122 123 func (r *bufferExportReq) handle() error { 124 if r.batch != nil { 125 var flush = r.b.impl.NewItemBatchHandler(context.Background()) 126 flush(r.batch) 127 } else { 128 logutil.Debugf("batch is nil, item: %s", r.b.name) 129 } 130 return nil 131 } 132 133 func (r *bufferExportReq) callback(err error) {} 134 135 func (b *bufferHolder) getGenerateReq() generateReq { 136 b.mux.Lock() 137 defer b.mux.Unlock() 138 req := &bufferGenerateReq{ 139 buffer: b.buffer, 140 b: b, 141 } 142 b.buffer = b.impl.NewItemBuffer(b.name) 143 b.resetTrigger() 144 return req 145 } 146 147 // StopTrigger stop buffer's trigger(Reminder) 148 func (b *bufferHolder) StopTrigger() bool { 149 b.mux.Lock() 150 defer b.mux.Unlock() 151 return b.trigger.Stop() 152 } 153 154 func (b *bufferHolder) resetTrigger() { 155 b.trigger.Reset(b.buffer.(batchpipe.Reminder).RemindNextAfter()) 156 } 157 158 var _ motrace.BatchProcessor = (*MOCollector)(nil) 159 160 // MOCollector handle all bufferPipe 161 type MOCollector struct { 162 motrace.BatchProcessor 163 ctx context.Context 164 165 // mux control all changes on buffers 166 mux sync.RWMutex 167 // buffers maintain working buffer for each type 168 buffers map[string]*bufferHolder 169 // awakeCollect handle collect signal 170 awakeCollect chan batchpipe.HasName 171 // awakeGenerate handle generate signal 172 awakeGenerate chan generateReq 173 // awakeBatch handle export signal 174 awakeBatch chan exportReq 175 176 collectorCnt int // WithCollectorCnt 177 generatorCnt int // WithGeneratorCnt 178 exporterCnt int // WithExporterCnt 179 // pipeImplHolder hold implement 180 pipeImplHolder *PipeImplHolder 181 182 // flow control 183 started uint32 184 stopOnce sync.Once 185 stopWait sync.WaitGroup 186 stopCh chan struct{} 187 } 188 189 type MOCollectorOption func(*MOCollector) 190 191 func NewMOCollector(ctx context.Context, opts ...MOCollectorOption) *MOCollector { 192 c := &MOCollector{ 193 ctx: ctx, 194 buffers: make(map[string]*bufferHolder), 195 awakeCollect: make(chan batchpipe.HasName, defaultQueueSize), 196 awakeGenerate: make(chan generateReq, 16), 197 awakeBatch: make(chan exportReq), 198 stopCh: make(chan struct{}), 199 collectorCnt: runtime.NumCPU(), 200 generatorCnt: runtime.NumCPU(), 201 exporterCnt: runtime.NumCPU(), 202 pipeImplHolder: newPipeImplHolder(), 203 } 204 for _, opt := range opts { 205 opt(c) 206 } 207 return c 208 } 209 210 func WithCollectorCnt(cnt int) MOCollectorOption { 211 return MOCollectorOption(func(c *MOCollector) { c.collectorCnt = cnt }) 212 } 213 func WithGeneratorCnt(cnt int) MOCollectorOption { 214 return MOCollectorOption(func(c *MOCollector) { c.generatorCnt = cnt }) 215 } 216 func WithExporterCnt(cnt int) MOCollectorOption { 217 return MOCollectorOption(func(c *MOCollector) { c.exporterCnt = cnt }) 218 } 219 220 func (c *MOCollector) initCnt() { 221 if c.collectorCnt <= 0 { 222 c.collectorCnt = c.pipeImplHolder.Size() * 2 223 } 224 if c.generatorCnt <= 0 { 225 c.generatorCnt = c.pipeImplHolder.Size() 226 } 227 if c.exporterCnt <= 0 { 228 c.exporterCnt = c.pipeImplHolder.Size() 229 } 230 } 231 232 func (c *MOCollector) Register(name batchpipe.HasName, impl motrace.PipeImpl) { 233 _ = c.pipeImplHolder.Put(name.GetName(), impl) 234 } 235 236 // Collect item in chan, if collector is stopped then return error 237 func (c *MOCollector) Collect(ctx context.Context, item batchpipe.HasName) error { 238 select { 239 case <-c.stopCh: 240 return moerr.NewInternalError(ctx, "stopped") 241 case c.awakeCollect <- item: 242 return nil 243 } 244 } 245 246 // Start all goroutine worker, including collector, generator, and exporter 247 func (c *MOCollector) Start() bool { 248 if atomic.LoadUint32(&c.started) != 0 { 249 return false 250 } 251 c.mux.Lock() 252 defer c.mux.Unlock() 253 if c.started != 0 { 254 return false 255 } 256 defer atomic.StoreUint32(&c.started, 1) 257 258 c.initCnt() 259 260 logutil.Infof("MOCollector Start") 261 for i := 0; i < c.collectorCnt; i++ { 262 c.stopWait.Add(1) 263 go c.doCollect(i) 264 } 265 for i := 0; i < c.generatorCnt; i++ { 266 c.stopWait.Add(1) 267 go c.doGenerate(i) 268 } 269 for i := 0; i < c.exporterCnt; i++ { 270 c.stopWait.Add(1) 271 go c.doExport(i) 272 } 273 return true 274 } 275 276 // doCollect handle all item accept work, send it to the corresponding buffer 277 // goroutine worker 278 func (c *MOCollector) doCollect(idx int) { 279 defer c.stopWait.Done() 280 ctx, span := trace.Start(c.ctx, "MOCollector.doCollect") 281 defer span.End() 282 logutil.Debugf("doCollect %dth: start", idx) 283 loop: 284 for { 285 select { 286 case i := <-c.awakeCollect: 287 c.mux.RLock() 288 if buf, has := c.buffers[i.GetName()]; !has { 289 logutil.Debugf("doCollect %dth: init buffer for %s", idx, i.GetName()) 290 c.mux.RUnlock() 291 c.mux.Lock() 292 if _, has := c.buffers[i.GetName()]; !has { 293 logutil.Debugf("doCollect %dth: init buffer done.", idx) 294 if impl, has := c.pipeImplHolder.Get(i.GetName()); !has { 295 panic(moerr.NewInternalError(ctx, "unknown item type: %s", i.GetName())) 296 } else { 297 buf = newBufferHolder(ctx, i, impl, awakeBufferFactory(c)) 298 c.buffers[i.GetName()] = buf 299 buf.Add(i) 300 buf.Start() 301 } 302 } 303 c.mux.Unlock() 304 } else { 305 buf.Add(i) 306 c.mux.RUnlock() 307 } 308 case <-c.stopCh: 309 break loop 310 } 311 } 312 logutil.Debugf("doCollect %dth: Done.", idx) 313 } 314 315 type generateReq interface { 316 handle(*bytes.Buffer) (exportReq, error) 317 callback(error) 318 } 319 320 type exportReq interface { 321 handle() error 322 callback(error) 323 } 324 325 // awakeBufferFactory frozen buffer, send GenRequest to awake 326 var awakeBufferFactory = func(c *MOCollector) func(holder *bufferHolder) { 327 return func(holder *bufferHolder) { 328 req := holder.getGenerateReq() 329 c.awakeGenerate <- req 330 } 331 } 332 333 // doGenerate handle buffer gen BatchRequest, which could be anything 334 // goroutine worker 335 func (c *MOCollector) doGenerate(idx int) { 336 defer c.stopWait.Done() 337 var buf = new(bytes.Buffer) 338 logutil.Debugf("doGenerate %dth: start", idx) 339 loop: 340 for { 341 select { 342 case req := <-c.awakeGenerate: 343 if exportReq, err := req.handle(buf); err != nil { 344 req.callback(err) 345 } else { 346 select { 347 case c.awakeBatch <- exportReq: 348 case <-c.stopCh: 349 } 350 } 351 case <-c.stopCh: 352 break loop 353 } 354 } 355 logutil.Debugf("doGenerate %dth: Done.", idx) 356 } 357 358 // doExport handle BatchRequest 359 func (c *MOCollector) doExport(idx int) { 360 defer c.stopWait.Done() 361 logutil.Debugf("doExport %dth: start", idx) 362 loop: 363 for { 364 select { 365 case req := <-c.awakeBatch: 366 if err := req.handle(); err != nil { 367 req.callback(err) 368 } 369 //c.handleBatch(holder) 370 case <-c.stopCh: 371 c.mux.Lock() 372 for len(c.awakeBatch) > 0 { 373 <-c.awakeBatch 374 } 375 c.mux.Unlock() 376 break loop 377 } 378 } 379 logutil.Debugf("doExport %dth: Done.", idx) 380 } 381 382 func (c *MOCollector) Stop(graceful bool) error { 383 var err error 384 var buf = new(bytes.Buffer) 385 c.stopOnce.Do(func() { 386 for len(c.awakeCollect) > 0 { 387 logutil.Debug(fmt.Sprintf("doCollect left %d job", len(c.awakeCollect)), logutil.NoReportFiled()) 388 time.Sleep(250 * time.Second) 389 } 390 c.mux.Lock() 391 for _, buffer := range c.buffers { 392 _ = buffer.StopTrigger() 393 } 394 c.mux.Unlock() 395 close(c.stopCh) 396 c.stopWait.Wait() 397 for _, buffer := range c.buffers { 398 generate := buffer.getGenerateReq() 399 if export, err := generate.handle(buf); err != nil { 400 generate.callback(err) 401 } else if err = export.handle(); err != nil { 402 export.callback(err) 403 } 404 } 405 }) 406 return err 407 } 408 409 type PipeImplHolder struct { 410 mux sync.RWMutex 411 impls map[string]motrace.PipeImpl 412 } 413 414 func newPipeImplHolder() *PipeImplHolder { 415 return &PipeImplHolder{ 416 impls: make(map[string]motrace.PipeImpl), 417 } 418 } 419 420 func (h *PipeImplHolder) Get(name string) (motrace.PipeImpl, bool) { 421 h.mux.RLock() 422 defer h.mux.RUnlock() 423 impl, has := h.impls[name] 424 return impl, has 425 } 426 427 func (h *PipeImplHolder) Put(name string, impl motrace.PipeImpl) bool { 428 h.mux.Lock() 429 defer h.mux.Unlock() 430 _, has := h.impls[name] 431 h.impls[name] = impl 432 return has 433 } 434 435 func (h *PipeImplHolder) Size() int { 436 h.mux.Lock() 437 defer h.mux.Unlock() 438 return len(h.impls) 439 }