github.com/matrixorigin/matrixone@v0.7.0/pkg/util/export/batch_processor.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package export
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"runtime"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/logutil"
    28  	"github.com/matrixorigin/matrixone/pkg/util/batchpipe"
    29  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    30  	"github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace"
    31  )
    32  
    33  const defaultQueueSize = 1310720 // queue mem cost = 10MB
    34  
    35  // bufferHolder hold ItemBuffer content, handle buffer's new/flush/reset/reminder(base on timer) operations.
    36  // work like:
    37  // ---> Add ---> ShouldFlush or trigger.signal -----> StopAndGetBatch ---> FlushAndReset ---> Add ---> ...
    38  // #     ^                   |No                |Yes, go next call
    39  // #     |<------------------/Accept next Add
    40  type bufferHolder struct {
    41  	ctx context.Context
    42  	// name like a type
    43  	name string
    44  	// buffer is instance of batchpipe.ItemBuffer with its own elimination algorithm(like LRU, LFU)
    45  	buffer batchpipe.ItemBuffer[batchpipe.HasName, any]
    46  	// signal send signal to Collector
    47  	signal bufferSignalFunc // see awakeBufferFactory
    48  	// impl NewItemBatchHandler
    49  	impl motrace.PipeImpl
    50  	// trigger handle Reminder strategy
    51  	trigger *time.Timer
    52  
    53  	mux sync.Mutex
    54  }
    55  
    56  type bufferSignalFunc func(*bufferHolder)
    57  
    58  func newBufferHolder(ctx context.Context, name batchpipe.HasName, impl motrace.PipeImpl, signal bufferSignalFunc) *bufferHolder {
    59  	buffer := impl.NewItemBuffer(name.GetName())
    60  	b := &bufferHolder{
    61  		ctx:    ctx,
    62  		name:   name.GetName(),
    63  		buffer: buffer,
    64  		signal: signal,
    65  		impl:   impl,
    66  	}
    67  	b.mux.Lock()
    68  	defer b.mux.Unlock()
    69  	b.trigger = time.AfterFunc(time.Hour, func() {})
    70  	return b
    71  }
    72  
    73  // Start separated from newBufferHolder, should call only once, fix trigger started before first Add
    74  func (b *bufferHolder) Start() {
    75  	b.mux.Lock()
    76  	defer b.mux.Unlock()
    77  	reminder := b.buffer.(batchpipe.Reminder)
    78  	b.trigger.Stop()
    79  	b.trigger = time.AfterFunc(reminder.RemindNextAfter(), func() {
    80  		if b.mux.TryLock() {
    81  			b.mux.Unlock()
    82  		}
    83  		b.signal(b)
    84  	})
    85  }
    86  
    87  // Add call buffer.Add(), while bufferHolder is NOT readonly
    88  func (b *bufferHolder) Add(item batchpipe.HasName) {
    89  	b.mux.Lock()
    90  	buf := b.buffer
    91  	buf.Add(item)
    92  	b.mux.Unlock()
    93  	if buf.ShouldFlush() {
    94  		b.signal(b)
    95  	}
    96  }
    97  
    98  var _ generateReq = (*bufferGenerateReq)(nil)
    99  
   100  type bufferGenerateReq struct {
   101  	buffer batchpipe.ItemBuffer[batchpipe.HasName, any]
   102  	// impl NewItemBatchHandler
   103  	b *bufferHolder
   104  }
   105  
   106  func (r *bufferGenerateReq) handle(buf *bytes.Buffer) (exportReq, error) {
   107  	batch := r.buffer.GetBatch(r.b.ctx, buf)
   108  	return &bufferExportReq{
   109  		batch: batch,
   110  		b:     r.b,
   111  	}, nil
   112  }
   113  
   114  func (r *bufferGenerateReq) callback(err error) {}
   115  
   116  var _ exportReq = (*bufferExportReq)(nil)
   117  
   118  type bufferExportReq struct {
   119  	batch any
   120  	b     *bufferHolder
   121  }
   122  
   123  func (r *bufferExportReq) handle() error {
   124  	if r.batch != nil {
   125  		var flush = r.b.impl.NewItemBatchHandler(context.Background())
   126  		flush(r.batch)
   127  	} else {
   128  		logutil.Debugf("batch is nil, item: %s", r.b.name)
   129  	}
   130  	return nil
   131  }
   132  
   133  func (r *bufferExportReq) callback(err error) {}
   134  
   135  func (b *bufferHolder) getGenerateReq() generateReq {
   136  	b.mux.Lock()
   137  	defer b.mux.Unlock()
   138  	req := &bufferGenerateReq{
   139  		buffer: b.buffer,
   140  		b:      b,
   141  	}
   142  	b.buffer = b.impl.NewItemBuffer(b.name)
   143  	b.resetTrigger()
   144  	return req
   145  }
   146  
   147  // StopTrigger stop buffer's trigger(Reminder)
   148  func (b *bufferHolder) StopTrigger() bool {
   149  	b.mux.Lock()
   150  	defer b.mux.Unlock()
   151  	return b.trigger.Stop()
   152  }
   153  
   154  func (b *bufferHolder) resetTrigger() {
   155  	b.trigger.Reset(b.buffer.(batchpipe.Reminder).RemindNextAfter())
   156  }
   157  
   158  var _ motrace.BatchProcessor = (*MOCollector)(nil)
   159  
   160  // MOCollector handle all bufferPipe
   161  type MOCollector struct {
   162  	motrace.BatchProcessor
   163  	ctx context.Context
   164  
   165  	// mux control all changes on buffers
   166  	mux sync.RWMutex
   167  	// buffers maintain working buffer for each type
   168  	buffers map[string]*bufferHolder
   169  	// awakeCollect handle collect signal
   170  	awakeCollect chan batchpipe.HasName
   171  	// awakeGenerate handle generate signal
   172  	awakeGenerate chan generateReq
   173  	// awakeBatch handle export signal
   174  	awakeBatch chan exportReq
   175  
   176  	collectorCnt int // WithCollectorCnt
   177  	generatorCnt int // WithGeneratorCnt
   178  	exporterCnt  int // WithExporterCnt
   179  	// pipeImplHolder hold implement
   180  	pipeImplHolder *PipeImplHolder
   181  
   182  	// flow control
   183  	started  uint32
   184  	stopOnce sync.Once
   185  	stopWait sync.WaitGroup
   186  	stopCh   chan struct{}
   187  }
   188  
   189  type MOCollectorOption func(*MOCollector)
   190  
   191  func NewMOCollector(ctx context.Context, opts ...MOCollectorOption) *MOCollector {
   192  	c := &MOCollector{
   193  		ctx:            ctx,
   194  		buffers:        make(map[string]*bufferHolder),
   195  		awakeCollect:   make(chan batchpipe.HasName, defaultQueueSize),
   196  		awakeGenerate:  make(chan generateReq, 16),
   197  		awakeBatch:     make(chan exportReq),
   198  		stopCh:         make(chan struct{}),
   199  		collectorCnt:   runtime.NumCPU(),
   200  		generatorCnt:   runtime.NumCPU(),
   201  		exporterCnt:    runtime.NumCPU(),
   202  		pipeImplHolder: newPipeImplHolder(),
   203  	}
   204  	for _, opt := range opts {
   205  		opt(c)
   206  	}
   207  	return c
   208  }
   209  
   210  func WithCollectorCnt(cnt int) MOCollectorOption {
   211  	return MOCollectorOption(func(c *MOCollector) { c.collectorCnt = cnt })
   212  }
   213  func WithGeneratorCnt(cnt int) MOCollectorOption {
   214  	return MOCollectorOption(func(c *MOCollector) { c.generatorCnt = cnt })
   215  }
   216  func WithExporterCnt(cnt int) MOCollectorOption {
   217  	return MOCollectorOption(func(c *MOCollector) { c.exporterCnt = cnt })
   218  }
   219  
   220  func (c *MOCollector) initCnt() {
   221  	if c.collectorCnt <= 0 {
   222  		c.collectorCnt = c.pipeImplHolder.Size() * 2
   223  	}
   224  	if c.generatorCnt <= 0 {
   225  		c.generatorCnt = c.pipeImplHolder.Size()
   226  	}
   227  	if c.exporterCnt <= 0 {
   228  		c.exporterCnt = c.pipeImplHolder.Size()
   229  	}
   230  }
   231  
   232  func (c *MOCollector) Register(name batchpipe.HasName, impl motrace.PipeImpl) {
   233  	_ = c.pipeImplHolder.Put(name.GetName(), impl)
   234  }
   235  
   236  // Collect item in chan, if collector is stopped then return error
   237  func (c *MOCollector) Collect(ctx context.Context, item batchpipe.HasName) error {
   238  	select {
   239  	case <-c.stopCh:
   240  		return moerr.NewInternalError(ctx, "stopped")
   241  	case c.awakeCollect <- item:
   242  		return nil
   243  	}
   244  }
   245  
   246  // Start all goroutine worker, including collector, generator, and exporter
   247  func (c *MOCollector) Start() bool {
   248  	if atomic.LoadUint32(&c.started) != 0 {
   249  		return false
   250  	}
   251  	c.mux.Lock()
   252  	defer c.mux.Unlock()
   253  	if c.started != 0 {
   254  		return false
   255  	}
   256  	defer atomic.StoreUint32(&c.started, 1)
   257  
   258  	c.initCnt()
   259  
   260  	logutil.Infof("MOCollector Start")
   261  	for i := 0; i < c.collectorCnt; i++ {
   262  		c.stopWait.Add(1)
   263  		go c.doCollect(i)
   264  	}
   265  	for i := 0; i < c.generatorCnt; i++ {
   266  		c.stopWait.Add(1)
   267  		go c.doGenerate(i)
   268  	}
   269  	for i := 0; i < c.exporterCnt; i++ {
   270  		c.stopWait.Add(1)
   271  		go c.doExport(i)
   272  	}
   273  	return true
   274  }
   275  
   276  // doCollect handle all item accept work, send it to the corresponding buffer
   277  // goroutine worker
   278  func (c *MOCollector) doCollect(idx int) {
   279  	defer c.stopWait.Done()
   280  	ctx, span := trace.Start(c.ctx, "MOCollector.doCollect")
   281  	defer span.End()
   282  	logutil.Debugf("doCollect %dth: start", idx)
   283  loop:
   284  	for {
   285  		select {
   286  		case i := <-c.awakeCollect:
   287  			c.mux.RLock()
   288  			if buf, has := c.buffers[i.GetName()]; !has {
   289  				logutil.Debugf("doCollect %dth: init buffer for %s", idx, i.GetName())
   290  				c.mux.RUnlock()
   291  				c.mux.Lock()
   292  				if _, has := c.buffers[i.GetName()]; !has {
   293  					logutil.Debugf("doCollect %dth: init buffer done.", idx)
   294  					if impl, has := c.pipeImplHolder.Get(i.GetName()); !has {
   295  						panic(moerr.NewInternalError(ctx, "unknown item type: %s", i.GetName()))
   296  					} else {
   297  						buf = newBufferHolder(ctx, i, impl, awakeBufferFactory(c))
   298  						c.buffers[i.GetName()] = buf
   299  						buf.Add(i)
   300  						buf.Start()
   301  					}
   302  				}
   303  				c.mux.Unlock()
   304  			} else {
   305  				buf.Add(i)
   306  				c.mux.RUnlock()
   307  			}
   308  		case <-c.stopCh:
   309  			break loop
   310  		}
   311  	}
   312  	logutil.Debugf("doCollect %dth: Done.", idx)
   313  }
   314  
   315  type generateReq interface {
   316  	handle(*bytes.Buffer) (exportReq, error)
   317  	callback(error)
   318  }
   319  
   320  type exportReq interface {
   321  	handle() error
   322  	callback(error)
   323  }
   324  
   325  // awakeBufferFactory frozen buffer, send GenRequest to awake
   326  var awakeBufferFactory = func(c *MOCollector) func(holder *bufferHolder) {
   327  	return func(holder *bufferHolder) {
   328  		req := holder.getGenerateReq()
   329  		c.awakeGenerate <- req
   330  	}
   331  }
   332  
   333  // doGenerate handle buffer gen BatchRequest, which could be anything
   334  // goroutine worker
   335  func (c *MOCollector) doGenerate(idx int) {
   336  	defer c.stopWait.Done()
   337  	var buf = new(bytes.Buffer)
   338  	logutil.Debugf("doGenerate %dth: start", idx)
   339  loop:
   340  	for {
   341  		select {
   342  		case req := <-c.awakeGenerate:
   343  			if exportReq, err := req.handle(buf); err != nil {
   344  				req.callback(err)
   345  			} else {
   346  				select {
   347  				case c.awakeBatch <- exportReq:
   348  				case <-c.stopCh:
   349  				}
   350  			}
   351  		case <-c.stopCh:
   352  			break loop
   353  		}
   354  	}
   355  	logutil.Debugf("doGenerate %dth: Done.", idx)
   356  }
   357  
   358  // doExport handle BatchRequest
   359  func (c *MOCollector) doExport(idx int) {
   360  	defer c.stopWait.Done()
   361  	logutil.Debugf("doExport %dth: start", idx)
   362  loop:
   363  	for {
   364  		select {
   365  		case req := <-c.awakeBatch:
   366  			if err := req.handle(); err != nil {
   367  				req.callback(err)
   368  			}
   369  			//c.handleBatch(holder)
   370  		case <-c.stopCh:
   371  			c.mux.Lock()
   372  			for len(c.awakeBatch) > 0 {
   373  				<-c.awakeBatch
   374  			}
   375  			c.mux.Unlock()
   376  			break loop
   377  		}
   378  	}
   379  	logutil.Debugf("doExport %dth: Done.", idx)
   380  }
   381  
   382  func (c *MOCollector) Stop(graceful bool) error {
   383  	var err error
   384  	var buf = new(bytes.Buffer)
   385  	c.stopOnce.Do(func() {
   386  		for len(c.awakeCollect) > 0 {
   387  			logutil.Debug(fmt.Sprintf("doCollect left %d job", len(c.awakeCollect)), logutil.NoReportFiled())
   388  			time.Sleep(250 * time.Second)
   389  		}
   390  		c.mux.Lock()
   391  		for _, buffer := range c.buffers {
   392  			_ = buffer.StopTrigger()
   393  		}
   394  		c.mux.Unlock()
   395  		close(c.stopCh)
   396  		c.stopWait.Wait()
   397  		for _, buffer := range c.buffers {
   398  			generate := buffer.getGenerateReq()
   399  			if export, err := generate.handle(buf); err != nil {
   400  				generate.callback(err)
   401  			} else if err = export.handle(); err != nil {
   402  				export.callback(err)
   403  			}
   404  		}
   405  	})
   406  	return err
   407  }
   408  
   409  type PipeImplHolder struct {
   410  	mux   sync.RWMutex
   411  	impls map[string]motrace.PipeImpl
   412  }
   413  
   414  func newPipeImplHolder() *PipeImplHolder {
   415  	return &PipeImplHolder{
   416  		impls: make(map[string]motrace.PipeImpl),
   417  	}
   418  }
   419  
   420  func (h *PipeImplHolder) Get(name string) (motrace.PipeImpl, bool) {
   421  	h.mux.RLock()
   422  	defer h.mux.RUnlock()
   423  	impl, has := h.impls[name]
   424  	return impl, has
   425  }
   426  
   427  func (h *PipeImplHolder) Put(name string, impl motrace.PipeImpl) bool {
   428  	h.mux.Lock()
   429  	defer h.mux.Unlock()
   430  	_, has := h.impls[name]
   431  	h.impls[name] = impl
   432  	return has
   433  }
   434  
   435  func (h *PipeImplHolder) Size() int {
   436  	h.mux.Lock()
   437  	defer h.mux.Unlock()
   438  	return len(h.impls)
   439  }