github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/logtail/collector.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logtail
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/logutil"
    28  	"github.com/matrixorigin/matrixone/pkg/txn/clock"
    29  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/catalog"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/model"
    31  	"github.com/tidwall/btree"
    32  )
    33  
    34  type TempFilter struct {
    35  	sync.RWMutex
    36  	m map[uint64]bool
    37  }
    38  
    39  type TempFKey struct{}
    40  
    41  func (f *TempFilter) Add(id uint64) {
    42  	f.Lock()
    43  	defer f.Unlock()
    44  	f.m[id] = true
    45  }
    46  
    47  func (f *TempFilter) Check(id uint64) (skip bool) {
    48  	f.Lock()
    49  	defer f.Unlock()
    50  	if _, ok := f.m[id]; ok {
    51  		delete(f.m, id)
    52  		return true
    53  	}
    54  	return false
    55  }
    56  
    57  var TempF *TempFilter
    58  
    59  func init() {
    60  	TempF = &TempFilter{
    61  		m: make(map[uint64]bool),
    62  	}
    63  }
    64  
    65  type Collector interface {
    66  	String() string
    67  	Run(lag time.Duration)
    68  	ScanInRange(from, to types.TS) (*DirtyTreeEntry, int)
    69  	ScanInRangePruned(from, to types.TS) *DirtyTreeEntry
    70  	IsCommitted(from, to types.TS) bool
    71  	GetAndRefreshMerged() *DirtyTreeEntry
    72  	Merge() *DirtyTreeEntry
    73  	GetMaxLSN(from, to types.TS) uint64
    74  	Init(maxts types.TS)
    75  }
    76  
    77  type DirtyEntryInterceptor = catalog.Processor
    78  
    79  type DirtyTreeEntry struct {
    80  	sync.RWMutex
    81  	start, end types.TS
    82  	tree       *model.Tree
    83  }
    84  
    85  func NewEmptyDirtyTreeEntry() *DirtyTreeEntry {
    86  	return &DirtyTreeEntry{
    87  		tree: model.NewTree(),
    88  	}
    89  }
    90  
    91  func NewDirtyTreeEntry(start, end types.TS, tree *model.Tree) *DirtyTreeEntry {
    92  	entry := NewEmptyDirtyTreeEntry()
    93  	entry.start = start
    94  	entry.end = end
    95  	entry.tree = tree
    96  	return entry
    97  }
    98  
    99  func (entry *DirtyTreeEntry) Merge(o *DirtyTreeEntry) {
   100  	if entry.start.Greater(&o.start) {
   101  		entry.start = o.start
   102  	}
   103  	if entry.end.Less(&o.end) {
   104  		entry.end = o.end
   105  	}
   106  	entry.tree.Merge(o.tree)
   107  }
   108  
   109  func (entry *DirtyTreeEntry) IsEmpty() bool {
   110  	return entry.tree.IsEmpty()
   111  }
   112  
   113  func (entry *DirtyTreeEntry) GetTimeRange() (from, to types.TS) {
   114  	return entry.start, entry.end
   115  }
   116  
   117  func (entry *DirtyTreeEntry) GetTree() (tree *model.Tree) {
   118  	return entry.tree
   119  }
   120  
   121  func (entry *DirtyTreeEntry) String() string {
   122  	var buf bytes.Buffer
   123  	_, _ = buf.WriteString(
   124  		fmt.Sprintf("DirtyTreeEntry[%s=>%s]\n",
   125  			entry.start.ToString(),
   126  			entry.end.ToString()))
   127  	_, _ = buf.WriteString(entry.tree.String())
   128  	return buf.String()
   129  }
   130  
   131  type dirtyCollector struct {
   132  	// sourcer
   133  	sourcer *Manager
   134  
   135  	// context
   136  	catalog     *catalog.Catalog
   137  	clock       *types.TsAlloctor
   138  	interceptor DirtyEntryInterceptor
   139  
   140  	// storage
   141  	storage struct {
   142  		sync.RWMutex
   143  		entries *btree.BTreeG[*DirtyTreeEntry]
   144  		maxTs   types.TS
   145  	}
   146  	merged atomic.Pointer[DirtyTreeEntry]
   147  }
   148  
   149  func NewDirtyCollector(
   150  	sourcer *Manager,
   151  	clock clock.Clock,
   152  	catalog *catalog.Catalog,
   153  	interceptor DirtyEntryInterceptor) *dirtyCollector {
   154  	collector := &dirtyCollector{
   155  		sourcer:     sourcer,
   156  		catalog:     catalog,
   157  		interceptor: interceptor,
   158  		clock:       types.NewTsAlloctor(clock),
   159  	}
   160  	collector.storage.entries = btree.NewBTreeGOptions(
   161  		func(a, b *DirtyTreeEntry) bool {
   162  			return a.start.Less(&b.start) && a.end.Less(&b.end)
   163  		}, btree.Options{
   164  			NoLocks: true,
   165  		})
   166  
   167  	collector.merged.Store(NewEmptyDirtyTreeEntry())
   168  	return collector
   169  }
   170  func (d *dirtyCollector) Init(maxts types.TS) {
   171  	d.storage.maxTs = maxts
   172  }
   173  func (d *dirtyCollector) Run(lag time.Duration) {
   174  	from, to := d.findRange(lag)
   175  
   176  	// stale range found, skip this run
   177  	if to.IsEmpty() {
   178  		return
   179  	}
   180  
   181  	d.rangeScanAndUpdate(from, to)
   182  	d.cleanupStorage()
   183  	d.GetAndRefreshMerged()
   184  }
   185  
   186  func (d *dirtyCollector) ScanInRangePruned(from, to types.TS) (
   187  	tree *DirtyTreeEntry) {
   188  	tree, _ = d.ScanInRange(from, to)
   189  	ctx := context.WithValue(context.Background(), TempFKey{}, 42)
   190  	if err := d.tryCompactTree(ctx, d.interceptor, tree.tree, from, to); err != nil {
   191  		panic(err)
   192  	}
   193  	return
   194  }
   195  
   196  func (d *dirtyCollector) GetMaxLSN(from, to types.TS) uint64 {
   197  	reader := d.sourcer.GetReader(from, to)
   198  	return reader.GetMaxLSN()
   199  }
   200  func (d *dirtyCollector) ScanInRange(from, to types.TS) (
   201  	entry *DirtyTreeEntry, count int) {
   202  	reader := d.sourcer.GetReader(from, to)
   203  	tree, count := reader.GetDirty()
   204  
   205  	// make a entry
   206  	entry = &DirtyTreeEntry{
   207  		start: from,
   208  		end:   to,
   209  		tree:  tree,
   210  	}
   211  	return
   212  }
   213  
   214  func (d *dirtyCollector) IsCommitted(from, to types.TS) bool {
   215  	reader := d.sourcer.GetReader(from, to)
   216  	return reader.IsCommitted()
   217  }
   218  
   219  // DirtyCount returns unflushed table, Object, block count
   220  func (d *dirtyCollector) DirtyCount() (tblCnt, objCnt int) {
   221  	merged := d.GetAndRefreshMerged()
   222  	tblCnt = merged.tree.TableCount()
   223  	for _, tblTree := range merged.tree.Tables {
   224  		objCnt += len(tblTree.Objs)
   225  	}
   226  	return
   227  }
   228  
   229  func (d *dirtyCollector) String() string {
   230  	merged := d.GetAndRefreshMerged()
   231  	return merged.tree.String()
   232  }
   233  
   234  func (d *dirtyCollector) GetAndRefreshMerged() (merged *DirtyTreeEntry) {
   235  	merged = d.merged.Load()
   236  	d.storage.RLock()
   237  	maxTs := d.storage.maxTs
   238  	d.storage.RUnlock()
   239  	if maxTs.LessEq(&merged.end) {
   240  		return
   241  	}
   242  	merged = d.Merge()
   243  	d.tryUpdateMerged(merged)
   244  	return
   245  }
   246  
   247  func (d *dirtyCollector) Merge() *DirtyTreeEntry {
   248  	// get storage snapshot and work on it
   249  	snapshot, maxTs := d.getStorageSnapshot()
   250  
   251  	merged := NewEmptyDirtyTreeEntry()
   252  	merged.end = maxTs
   253  
   254  	// scan base on the snapshot
   255  	// merge all trees of the entry
   256  	snapshot.Scan(func(entry *DirtyTreeEntry) bool {
   257  		entry.RLock()
   258  		defer entry.RUnlock()
   259  		merged.tree.Merge(entry.tree)
   260  		return true
   261  	})
   262  
   263  	return merged
   264  }
   265  
   266  func (d *dirtyCollector) tryUpdateMerged(merged *DirtyTreeEntry) (updated bool) {
   267  	var old *DirtyTreeEntry
   268  	for {
   269  		old = d.merged.Load()
   270  		if old.end.GreaterEq(&merged.end) {
   271  			break
   272  		}
   273  		if d.merged.CompareAndSwap(old, merged) {
   274  			updated = true
   275  			break
   276  		}
   277  	}
   278  	return
   279  }
   280  
   281  func (d *dirtyCollector) findRange(lagDuration time.Duration) (from, to types.TS) {
   282  	now := d.clock.Alloc()
   283  	// a deliberate lag is made here for flushing and checkpoint to
   284  	// avoid fierce competition on the very new ablock, whose PrepareCompact probably
   285  	// returns false
   286  	lag := types.BuildTS(now.Physical()-int64(lagDuration), now.Logical())
   287  	d.storage.RLock()
   288  	defer d.storage.RUnlock()
   289  	if lag.LessEq(&d.storage.maxTs) {
   290  		return
   291  	}
   292  	from, to = d.storage.maxTs.Next(), lag
   293  	return
   294  }
   295  
   296  func (d *dirtyCollector) rangeScanAndUpdate(from, to types.TS) (updated bool) {
   297  	entry, _ := d.ScanInRange(from, to)
   298  
   299  	// try to store the entry
   300  	updated = d.tryStoreEntry(entry)
   301  	return
   302  }
   303  
   304  func (d *dirtyCollector) tryStoreEntry(entry *DirtyTreeEntry) (ok bool) {
   305  	ok = true
   306  	d.storage.Lock()
   307  	defer d.storage.Unlock()
   308  
   309  	// storage was updated before
   310  	maxTS := d.storage.maxTs.Next()
   311  	if !entry.start.Equal(&maxTS) {
   312  		ok = false
   313  		return
   314  	}
   315  
   316  	// update storage maxTs
   317  	d.storage.maxTs = entry.end
   318  
   319  	// don't store empty entry
   320  	if entry.tree.IsEmpty() {
   321  		return
   322  	}
   323  
   324  	d.storage.entries.Set(entry)
   325  	return
   326  }
   327  
   328  func (d *dirtyCollector) getStorageSnapshot() (ss *btree.BTreeG[*DirtyTreeEntry], ts types.TS) {
   329  	d.storage.Lock()
   330  	defer d.storage.Unlock()
   331  	ss = d.storage.entries.Copy()
   332  	ts = d.storage.maxTs
   333  	return
   334  }
   335  
   336  // Scan current dirty entries, remove all flushed or not found ones, and drive interceptor on remaining block entries.
   337  func (d *dirtyCollector) cleanupStorage() {
   338  	toDeletes := make([]*DirtyTreeEntry, 0)
   339  
   340  	// get a snapshot of entries
   341  	entries, _ := d.getStorageSnapshot()
   342  
   343  	// scan all entries in the storage
   344  	// try compact the dirty tree for each entry
   345  	// if the dirty tree is empty, delete the specified entry from the storage
   346  	entries.Scan(func(entry *DirtyTreeEntry) bool {
   347  		entry.Lock()
   348  		defer entry.Unlock()
   349  		// dirty blocks within the time range has been flushed
   350  		// exclude the related dirty tree from the foreset
   351  		if entry.tree.IsEmpty() {
   352  			toDeletes = append(toDeletes, entry)
   353  			return true
   354  		}
   355  		if err := d.tryCompactTree(context.Background(), d.interceptor, entry.tree, entry.start, entry.end); err != nil {
   356  			logutil.Warnf("error: interceptor on dirty tree: %v", err)
   357  		}
   358  		if entry.tree.IsEmpty() {
   359  			toDeletes = append(toDeletes, entry)
   360  		}
   361  		return true
   362  	})
   363  
   364  	if len(toDeletes) == 0 {
   365  		return
   366  	}
   367  
   368  	// remove entries with empty dirty tree from the storage
   369  	d.storage.Lock()
   370  	defer d.storage.Unlock()
   371  	for _, tree := range toDeletes {
   372  		d.storage.entries.Delete(tree)
   373  	}
   374  }
   375  
   376  // iter the tree and call interceptor to process block. flushed block, empty obj and table will be removed from the tree
   377  func (d *dirtyCollector) tryCompactTree(
   378  	ctx context.Context,
   379  	interceptor DirtyEntryInterceptor,
   380  	tree *model.Tree, from, to types.TS) (err error) {
   381  	var (
   382  		db  *catalog.DBEntry
   383  		tbl *catalog.TableEntry
   384  		obj *catalog.ObjectEntry
   385  	)
   386  	for id, dirtyTable := range tree.Tables {
   387  		// remove empty tables
   388  		if dirtyTable.Compact() {
   389  			tree.Shrink(id)
   390  			continue
   391  		}
   392  
   393  		if db, err = d.catalog.GetDatabaseByID(dirtyTable.DbID); err != nil {
   394  			if moerr.IsMoErrCode(err, moerr.OkExpectedEOB) {
   395  				tree.Shrink(id)
   396  				err = nil
   397  				continue
   398  			}
   399  			break
   400  		}
   401  		if tbl, err = db.GetTableEntryByID(dirtyTable.ID); err != nil {
   402  			if moerr.IsMoErrCode(err, moerr.OkExpectedEOB) {
   403  				tree.Shrink(id)
   404  				err = nil
   405  				continue
   406  			}
   407  			break
   408  		}
   409  
   410  		tbl.Stats.RLock()
   411  		lastFlush := tbl.Stats.LastFlush
   412  		if lastFlush.GreaterEq(&to) {
   413  			tree.Shrink(id)
   414  			tbl.Stats.RUnlock()
   415  			continue
   416  		}
   417  		tbl.Stats.RUnlock()
   418  
   419  		if x := ctx.Value(TempFKey{}); x != nil && TempF.Check(tbl.ID) {
   420  			logutil.Infof("temp filter skip table %v-%v", tbl.ID, tbl.GetLastestSchemaLocked().Name)
   421  			tree.Shrink(id)
   422  			continue
   423  		}
   424  
   425  		for id, dirtyObj := range dirtyTable.Objs {
   426  			if obj, err = tbl.GetObjectByID(dirtyObj.ID); err != nil {
   427  				if moerr.IsMoErrCode(err, moerr.OkExpectedEOB) {
   428  					dirtyTable.Shrink(id)
   429  					err = nil
   430  					continue
   431  				}
   432  				return
   433  			}
   434  			var calibration int
   435  			calibration, err = obj.GetObjectData().RunCalibration()
   436  			if err != nil {
   437  				logutil.Warnf("get object rows failed, obj %v, err: %v", obj.ID.String(), err)
   438  				continue
   439  			}
   440  			if calibration == 0 {
   441  				// TODO: may be put it to post replay process
   442  				// FIXME
   443  				if obj.HasPersistedData() {
   444  					obj.GetObjectData().TryUpgrade()
   445  				}
   446  				dirtyTable.Shrink(id)
   447  				continue
   448  			}
   449  			if !obj.IsAppendable() {
   450  				newFrom := from
   451  				if lastFlush.Greater(&newFrom) {
   452  					newFrom = lastFlush
   453  				}
   454  				// sometimes, delchain is no cleared after flushing table tail.
   455  				// the reason is still unknown, but here bumping the check from ts to lastFlush is correct anyway.
   456  				found, _ := obj.GetObjectData().HasDeleteIntentsPreparedIn(newFrom, to)
   457  				if !found {
   458  					dirtyTable.Shrink(id)
   459  					continue
   460  				}
   461  			}
   462  			if err = interceptor.OnObject(obj); err != nil {
   463  				return
   464  			}
   465  		}
   466  	}
   467  	tree.Compact()
   468  	return
   469  }