github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/causetstore/petri/petri.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package petri
    15  
    16  import (
    17  	"context"
    18  	"sync"
    19  	"sync/atomic"
    20  	"time"
    21  	"unsafe"
    22  
    23  	"github.com/ngaut/pools"
    24  	"github.com/ngaut/sync2"
    25  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    26  	"github.com/whtcorpsinc/BerolinaSQL/perceptron"
    27  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    28  	"github.com/whtcorpsinc/errors"
    29  	"github.com/whtcorpsinc/failpoint"
    30  	"github.com/whtcorpsinc/milevadb/bindinfo"
    31  	"github.com/whtcorpsinc/milevadb/causetstore/einsteindb"
    32  	"github.com/whtcorpsinc/milevadb/config"
    33  	"github.com/whtcorpsinc/milevadb/dbs"
    34  	"github.com/whtcorpsinc/milevadb/ekv"
    35  	"github.com/whtcorpsinc/milevadb/errno"
    36  	"github.com/whtcorpsinc/milevadb/metrics"
    37  	"github.com/whtcorpsinc/milevadb/petri/infosync"
    38  	"github.com/whtcorpsinc/milevadb/privilege/privileges"
    39  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    40  	"github.com/whtcorpsinc/milevadb/schemareplicant/perfschema"
    41  	"github.com/whtcorpsinc/milevadb/soliton"
    42  	"github.com/whtcorpsinc/milevadb/soliton/expensivequery"
    43  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    44  	"github.com/whtcorpsinc/milevadb/soliton/petriutil"
    45  	"github.com/whtcorpsinc/milevadb/soliton/sqlexec"
    46  	"github.com/whtcorpsinc/milevadb/spacetime"
    47  	"github.com/whtcorpsinc/milevadb/statistics/handle"
    48  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    49  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    50  	"github.com/whtcorpsinc/milevadb/telemetry"
    51  	"github.com/whtcorpsinc/milevadb/tenant"
    52  	"go.etcd.io/etcd/clientv3"
    53  	"go.uber.org/zap"
    54  	"google.golang.org/grpc"
    55  	"google.golang.org/grpc/keepalive"
    56  )
    57  
    58  // Petri represents a storage space. Different petris can use the same database name.
    59  // Multiple petris can be used in parallel without synchronization.
    60  type Petri struct {
    61  	causetstore          ekv.CausetStorage
    62  	infoHandle           *schemareplicant.Handle
    63  	privHandle           *privileges.Handle
    64  	bindHandle           *bindinfo.BindHandle
    65  	statsHandle          unsafe.Pointer
    66  	statsLease           time.Duration
    67  	dbs                  dbs.DBS
    68  	info                 *infosync.InfoSyncer
    69  	m                    sync.Mutex
    70  	SchemaValidator      SchemaValidator
    71  	sysStochastikPool    *stochastikPool
    72  	exit                 chan struct{}
    73  	etcdClient           *clientv3.Client
    74  	gvc                  GlobalVariableCache
    75  	slowQuery            *topNSlowQueries
    76  	expensiveQueryHandle *expensivequery.Handle
    77  	wg                   sync.WaitGroup
    78  	statsUFIDelating     sync2.AtomicInt32
    79  	cancel               context.CancelFunc
    80  	indexUsageSyncLease  time.Duration
    81  }
    82  
    83  // loadSchemaReplicant loads schemareplicant at startTS into handle, usedSchemaVersion is the currently used
    84  // schemareplicant version, if it is the same as the schemaReplicant version at startTS, we don't need to reload again.
    85  // It returns the latest schemaReplicant version, the changed causet IDs, whether it's a full load and an error.
    86  func (do *Petri) loadSchemaReplicant(handle *schemareplicant.Handle, usedSchemaVersion int64,
    87  	startTS uint64) (neededSchemaVersion int64, change *einsteindb.RelatedSchemaChange, fullLoad bool, err error) {
    88  	snapshot, err := do.causetstore.GetSnapshot(ekv.NewVersion(startTS))
    89  	if err != nil {
    90  		return 0, nil, fullLoad, err
    91  	}
    92  	m := spacetime.NewSnapshotMeta(snapshot)
    93  	neededSchemaVersion, err = m.GetSchemaVersion()
    94  	if err != nil {
    95  		return 0, nil, fullLoad, err
    96  	}
    97  	if usedSchemaVersion != 0 && usedSchemaVersion == neededSchemaVersion {
    98  		return neededSchemaVersion, nil, fullLoad, nil
    99  	}
   100  
   101  	// UFIDelate self schemaReplicant version to etcd.
   102  	defer func() {
   103  		// There are two possibilities for not uFIDelating the self schemaReplicant version to etcd.
   104  		// 1. Failed to loading schemaReplicant information.
   105  		// 2. When users use history read feature, the neededSchemaVersion isn't the latest schemaReplicant version.
   106  		if err != nil || neededSchemaVersion < do.SchemaReplicant().SchemaMetaVersion() {
   107  			logutil.BgLogger().Info("do not uFIDelate self schemaReplicant version to etcd",
   108  				zap.Int64("usedSchemaVersion", usedSchemaVersion),
   109  				zap.Int64("neededSchemaVersion", neededSchemaVersion), zap.Error(err))
   110  			return
   111  		}
   112  
   113  		err = do.dbs.SchemaSyncer().UFIDelateSelfVersion(context.Background(), neededSchemaVersion)
   114  		if err != nil {
   115  			logutil.BgLogger().Info("uFIDelate self version failed",
   116  				zap.Int64("usedSchemaVersion", usedSchemaVersion),
   117  				zap.Int64("neededSchemaVersion", neededSchemaVersion), zap.Error(err))
   118  		}
   119  	}()
   120  
   121  	startTime := time.Now()
   122  	ok, relatedChanges, err := do.tryLoadSchemaDiffs(m, usedSchemaVersion, neededSchemaVersion)
   123  	if err != nil {
   124  		// We can fall back to full load, don't need to return the error.
   125  		logutil.BgLogger().Error("failed to load schemaReplicant diff", zap.Error(err))
   126  	}
   127  	if ok {
   128  		logutil.BgLogger().Info("diff load SchemaReplicant success",
   129  			zap.Int64("usedSchemaVersion", usedSchemaVersion),
   130  			zap.Int64("neededSchemaVersion", neededSchemaVersion),
   131  			zap.Duration("start time", time.Since(startTime)),
   132  			zap.Int64s("phyTblIDs", relatedChanges.PhyTblIDS),
   133  			zap.Uint64s("actionTypes", relatedChanges.CausetActionTypes))
   134  		return neededSchemaVersion, relatedChanges, fullLoad, nil
   135  	}
   136  
   137  	fullLoad = true
   138  	schemas, err := do.fetchAllSchemasWithBlocks(m)
   139  	if err != nil {
   140  		return 0, nil, fullLoad, err
   141  	}
   142  
   143  	newISBuilder, err := schemareplicant.NewBuilder(handle).InitWithDBInfos(schemas, neededSchemaVersion)
   144  	if err != nil {
   145  		return 0, nil, fullLoad, err
   146  	}
   147  	logutil.BgLogger().Info("full load SchemaReplicant success",
   148  		zap.Int64("usedSchemaVersion", usedSchemaVersion),
   149  		zap.Int64("neededSchemaVersion", neededSchemaVersion),
   150  		zap.Duration("start time", time.Since(startTime)))
   151  	newISBuilder.Build()
   152  	return neededSchemaVersion, nil, fullLoad, nil
   153  }
   154  
   155  func (do *Petri) fetchAllSchemasWithBlocks(m *spacetime.Meta) ([]*perceptron.DBInfo, error) {
   156  	allSchemas, err := m.ListDatabases()
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  	splittedSchemas := do.splitForConcurrentFetch(allSchemas)
   161  	doneCh := make(chan error, len(splittedSchemas))
   162  	for _, schemas := range splittedSchemas {
   163  		go do.fetchSchemasWithBlocks(schemas, m, doneCh)
   164  	}
   165  	for range splittedSchemas {
   166  		err = <-doneCh
   167  		if err != nil {
   168  			return nil, err
   169  		}
   170  	}
   171  	return allSchemas, nil
   172  }
   173  
   174  // fetchSchemaConcurrency controls the goroutines to load schemas, but more goroutines
   175  // increase the memory usage when calling json.Unmarshal(), which would cause OOM,
   176  // so we decrease the concurrency.
   177  const fetchSchemaConcurrency = 1
   178  
   179  func (do *Petri) splitForConcurrentFetch(schemas []*perceptron.DBInfo) [][]*perceptron.DBInfo {
   180  	groupSize := (len(schemas) + fetchSchemaConcurrency - 1) / fetchSchemaConcurrency
   181  	splitted := make([][]*perceptron.DBInfo, 0, fetchSchemaConcurrency)
   182  	schemaCnt := len(schemas)
   183  	for i := 0; i < schemaCnt; i += groupSize {
   184  		end := i + groupSize
   185  		if end > schemaCnt {
   186  			end = schemaCnt
   187  		}
   188  		splitted = append(splitted, schemas[i:end])
   189  	}
   190  	return splitted
   191  }
   192  
   193  func (do *Petri) fetchSchemasWithBlocks(schemas []*perceptron.DBInfo, m *spacetime.Meta, done chan error) {
   194  	for _, di := range schemas {
   195  		if di.State != perceptron.StatePublic {
   196  			// schemaReplicant is not public, can't be used outside.
   197  			continue
   198  		}
   199  		blocks, err := m.ListBlocks(di.ID)
   200  		if err != nil {
   201  			done <- err
   202  			return
   203  		}
   204  		// If TreatOldVersionUTF8AsUTF8MB4 was enable, need to convert the old version schemaReplicant UTF8 charset to UTF8MB4.
   205  		if config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 {
   206  			for _, tbInfo := range blocks {
   207  				schemareplicant.ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo)
   208  			}
   209  		}
   210  		di.Blocks = make([]*perceptron.BlockInfo, 0, len(blocks))
   211  		for _, tbl := range blocks {
   212  			if tbl.State != perceptron.StatePublic {
   213  				// schemaReplicant is not public, can't be used outside.
   214  				continue
   215  			}
   216  			schemareplicant.ConvertCharsetDefCauslateToLowerCaseIfNeed(tbl)
   217  			// Check whether the causet is in repair mode.
   218  			if petriutil.RepairInfo.InRepairMode() && petriutil.RepairInfo.CheckAndFetchRepairedBlock(di, tbl) {
   219  				continue
   220  			}
   221  			di.Blocks = append(di.Blocks, tbl)
   222  		}
   223  	}
   224  	done <- nil
   225  }
   226  
   227  const (
   228  	initialVersion         = 0
   229  	maxNumberOfDiffsToLoad = 100
   230  )
   231  
   232  func isTooOldSchema(usedVersion, newVersion int64) bool {
   233  	if usedVersion == initialVersion || newVersion-usedVersion > maxNumberOfDiffsToLoad {
   234  		return true
   235  	}
   236  	return false
   237  }
   238  
   239  // tryLoadSchemaDiffs tries to only load latest schemaReplicant changes.
   240  // Return true if the schemaReplicant is loaded successfully.
   241  // Return false if the schemaReplicant can not be loaded by schemaReplicant diff, then we need to do full load.
   242  // The second returned value is the delta uFIDelated causet and partition IDs.
   243  func (do *Petri) tryLoadSchemaDiffs(m *spacetime.Meta, usedVersion, newVersion int64) (bool, *einsteindb.RelatedSchemaChange, error) {
   244  	// If there isn't any used version, or used version is too old, we do full load.
   245  	// And when users use history read feature, we will set usedVersion to initialVersion, then full load is needed.
   246  	if isTooOldSchema(usedVersion, newVersion) {
   247  		return false, nil, nil
   248  	}
   249  	var diffs []*perceptron.SchemaDiff
   250  	for usedVersion < newVersion {
   251  		usedVersion++
   252  		diff, err := m.GetSchemaDiff(usedVersion)
   253  		if err != nil {
   254  			return false, nil, err
   255  		}
   256  		if diff == nil {
   257  			// If diff is missing for any version between used and new version, we fall back to full reload.
   258  			return false, nil, nil
   259  		}
   260  		diffs = append(diffs, diff)
   261  	}
   262  	builder := schemareplicant.NewBuilder(do.infoHandle).InitWithOldSchemaReplicant()
   263  	phyTblIDs := make([]int64, 0, len(diffs))
   264  	actions := make([]uint64, 0, len(diffs))
   265  	for _, diff := range diffs {
   266  		IDs, err := builder.ApplyDiff(m, diff)
   267  		if err != nil {
   268  			return false, nil, err
   269  		}
   270  		if canSkipSchemaCheckerDBS(diff.Type) {
   271  			continue
   272  		}
   273  		phyTblIDs = append(phyTblIDs, IDs...)
   274  		for i := 0; i < len(IDs); i++ {
   275  			actions = append(actions, uint64(1<<diff.Type))
   276  		}
   277  	}
   278  	builder.Build()
   279  	relatedChange := einsteindb.RelatedSchemaChange{}
   280  	relatedChange.PhyTblIDS = phyTblIDs
   281  	relatedChange.CausetActionTypes = actions
   282  	return true, &relatedChange, nil
   283  }
   284  
   285  func canSkipSchemaCheckerDBS(tp perceptron.CausetActionType) bool {
   286  	switch tp {
   287  	case perceptron.CausetActionUFIDelateTiFlashReplicaStatus, perceptron.CausetActionSetTiFlashReplica:
   288  		return true
   289  	}
   290  	return false
   291  }
   292  
   293  // SchemaReplicant gets information schemaReplicant from petri.
   294  func (do *Petri) SchemaReplicant() schemareplicant.SchemaReplicant {
   295  	return do.infoHandle.Get()
   296  }
   297  
   298  // GetSnapshotSchemaReplicant gets a snapshot information schemaReplicant.
   299  func (do *Petri) GetSnapshotSchemaReplicant(snapshotTS uint64) (schemareplicant.SchemaReplicant, error) {
   300  	snapHandle := do.infoHandle.EmptyClone()
   301  	// For the snapHandle, it's an empty Handle, so its usedSchemaVersion is initialVersion.
   302  	_, _, _, err := do.loadSchemaReplicant(snapHandle, initialVersion, snapshotTS)
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  	return snapHandle.Get(), nil
   307  }
   308  
   309  // GetSnapshotMeta gets a new snapshot spacetime at startTS.
   310  func (do *Petri) GetSnapshotMeta(startTS uint64) (*spacetime.Meta, error) {
   311  	snapshot, err := do.causetstore.GetSnapshot(ekv.NewVersion(startTS))
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  	return spacetime.NewSnapshotMeta(snapshot), nil
   316  }
   317  
   318  // DBS gets DBS from petri.
   319  func (do *Petri) DBS() dbs.DBS {
   320  	return do.dbs
   321  }
   322  
   323  // InfoSyncer gets infoSyncer from petri.
   324  func (do *Petri) InfoSyncer() *infosync.InfoSyncer {
   325  	return do.info
   326  }
   327  
   328  // CausetStore gets KV causetstore from petri.
   329  func (do *Petri) CausetStore() ekv.CausetStorage {
   330  	return do.causetstore
   331  }
   332  
   333  // GetScope gets the status variables scope.
   334  func (do *Petri) GetScope(status string) variable.ScopeFlag {
   335  	// Now petri status variables scope are all default scope.
   336  	return variable.DefaultStatusVarScopeFlag
   337  }
   338  
   339  // Reload reloads SchemaReplicant.
   340  // It's public in order to do the test.
   341  func (do *Petri) Reload() error {
   342  	failpoint.Inject("ErrorMockReloadFailed", func(val failpoint.Value) {
   343  		if val.(bool) {
   344  			failpoint.Return(errors.New("mock reload failed"))
   345  		}
   346  	})
   347  
   348  	// Lock here for only once at the same time.
   349  	do.m.Lock()
   350  	defer do.m.Unlock()
   351  
   352  	startTime := time.Now()
   353  
   354  	var err error
   355  	var neededSchemaVersion int64
   356  
   357  	ver, err := do.causetstore.CurrentVersion()
   358  	if err != nil {
   359  		return err
   360  	}
   361  
   362  	schemaVersion := int64(0)
   363  	oldSchemaReplicant := do.infoHandle.Get()
   364  	if oldSchemaReplicant != nil {
   365  		schemaVersion = oldSchemaReplicant.SchemaMetaVersion()
   366  	}
   367  
   368  	var (
   369  		fullLoad       bool
   370  		relatedChanges *einsteindb.RelatedSchemaChange
   371  	)
   372  	neededSchemaVersion, relatedChanges, fullLoad, err = do.loadSchemaReplicant(do.infoHandle, schemaVersion, ver.Ver)
   373  	metrics.LoadSchemaDuration.Observe(time.Since(startTime).Seconds())
   374  	if err != nil {
   375  		metrics.LoadSchemaCounter.WithLabelValues("failed").Inc()
   376  		return err
   377  	}
   378  	metrics.LoadSchemaCounter.WithLabelValues("succ").Inc()
   379  
   380  	if fullLoad {
   381  		logutil.BgLogger().Info("full load and reset schemaReplicant validator")
   382  		do.SchemaValidator.Reset()
   383  	}
   384  	do.SchemaValidator.UFIDelate(ver.Ver, schemaVersion, neededSchemaVersion, relatedChanges)
   385  
   386  	lease := do.DBS().GetLease()
   387  	sub := time.Since(startTime)
   388  	// Reload interval is lease / 2, if load schemaReplicant time elapses more than this interval,
   389  	// some query maybe responded by ErrSchemaReplicantExpired error.
   390  	if sub > (lease/2) && lease > 0 {
   391  		logutil.BgLogger().Warn("loading schemaReplicant takes a long time", zap.Duration("take time", sub))
   392  	}
   393  
   394  	return nil
   395  }
   396  
   397  // LogSlowQuery keeps topN recent slow queries in petri.
   398  func (do *Petri) LogSlowQuery(query *SlowQueryInfo) {
   399  	do.slowQuery.mu.RLock()
   400  	defer do.slowQuery.mu.RUnlock()
   401  	if do.slowQuery.mu.closed {
   402  		return
   403  	}
   404  
   405  	select {
   406  	case do.slowQuery.ch <- query:
   407  	default:
   408  	}
   409  }
   410  
   411  // ShowSlowQuery returns the slow queries.
   412  func (do *Petri) ShowSlowQuery(showSlow *ast.ShowSlow) []*SlowQueryInfo {
   413  	msg := &showSlowMessage{
   414  		request: showSlow,
   415  	}
   416  	msg.Add(1)
   417  	do.slowQuery.msgCh <- msg
   418  	msg.Wait()
   419  	return msg.result
   420  }
   421  
   422  func (do *Petri) topNSlowQueryLoop() {
   423  	defer soliton.Recover(metrics.LabelPetri, "topNSlowQueryLoop", nil, false)
   424  	ticker := time.NewTicker(time.Minute * 10)
   425  	defer func() {
   426  		ticker.Stop()
   427  		do.wg.Done()
   428  		logutil.BgLogger().Info("topNSlowQueryLoop exited.")
   429  	}()
   430  	for {
   431  		select {
   432  		case now := <-ticker.C:
   433  			do.slowQuery.RemoveExpired(now)
   434  		case info, ok := <-do.slowQuery.ch:
   435  			if !ok {
   436  				return
   437  			}
   438  			do.slowQuery.Append(info)
   439  		case msg := <-do.slowQuery.msgCh:
   440  			req := msg.request
   441  			switch req.Tp {
   442  			case ast.ShowSlowTop:
   443  				msg.result = do.slowQuery.QueryTop(int(req.Count), req.HoTT)
   444  			case ast.ShowSlowRecent:
   445  				msg.result = do.slowQuery.QueryRecent(int(req.Count))
   446  			default:
   447  				msg.result = do.slowQuery.QueryAll()
   448  			}
   449  			msg.Done()
   450  		}
   451  	}
   452  }
   453  
   454  func (do *Petri) infoSyncerKeeper() {
   455  	defer func() {
   456  		do.wg.Done()
   457  		logutil.BgLogger().Info("infoSyncerKeeper exited.")
   458  		soliton.Recover(metrics.LabelPetri, "infoSyncerKeeper", nil, false)
   459  	}()
   460  	ticker := time.NewTicker(infosync.ReportInterval)
   461  	defer ticker.Stop()
   462  	for {
   463  		select {
   464  		case <-ticker.C:
   465  			do.info.ReportMinStartTS(do.CausetStore())
   466  		case <-do.info.Done():
   467  			logutil.BgLogger().Info("server info syncer need to restart")
   468  			if err := do.info.Restart(context.Background()); err != nil {
   469  				logutil.BgLogger().Error("server info syncer restart failed", zap.Error(err))
   470  			} else {
   471  				logutil.BgLogger().Info("server info syncer restarted")
   472  			}
   473  		case <-do.exit:
   474  			return
   475  		}
   476  	}
   477  }
   478  
   479  func (do *Petri) topologySyncerKeeper() {
   480  	defer soliton.Recover(metrics.LabelPetri, "topologySyncerKeeper", nil, false)
   481  	ticker := time.NewTicker(infosync.TopologyTimeToRefresh)
   482  	defer func() {
   483  		ticker.Stop()
   484  		do.wg.Done()
   485  		logutil.BgLogger().Info("topologySyncerKeeper exited.")
   486  	}()
   487  
   488  	for {
   489  		select {
   490  		case <-ticker.C:
   491  			err := do.info.StoreTopologyInfo(context.Background())
   492  			if err != nil {
   493  				logutil.BgLogger().Error("refresh topology in loop failed", zap.Error(err))
   494  			}
   495  		case <-do.info.TopologyDone():
   496  			logutil.BgLogger().Info("server topology syncer need to restart")
   497  			if err := do.info.RestartTopology(context.Background()); err != nil {
   498  				logutil.BgLogger().Error("server topology syncer restart failed", zap.Error(err))
   499  			} else {
   500  				logutil.BgLogger().Info("server topology syncer restarted")
   501  			}
   502  		case <-do.exit:
   503  			return
   504  		}
   505  	}
   506  }
   507  
   508  func (do *Petri) loadSchemaInLoop(ctx context.Context, lease time.Duration) {
   509  	defer soliton.Recover(metrics.LabelPetri, "loadSchemaInLoop", nil, true)
   510  	// Lease renewal can run at any frequency.
   511  	// Use lease/2 here as recommend by paper.
   512  	ticker := time.NewTicker(lease / 2)
   513  	defer func() {
   514  		ticker.Stop()
   515  		do.wg.Done()
   516  		logutil.BgLogger().Info("loadSchemaInLoop exited.")
   517  	}()
   518  	syncer := do.dbs.SchemaSyncer()
   519  
   520  	for {
   521  		select {
   522  		case <-ticker.C:
   523  			err := do.Reload()
   524  			if err != nil {
   525  				logutil.BgLogger().Error("reload schemaReplicant in loop failed", zap.Error(err))
   526  			}
   527  		case _, ok := <-syncer.GlobalVersionCh():
   528  			err := do.Reload()
   529  			if err != nil {
   530  				logutil.BgLogger().Error("reload schemaReplicant in loop failed", zap.Error(err))
   531  			}
   532  			if !ok {
   533  				logutil.BgLogger().Warn("reload schemaReplicant in loop, schemaReplicant syncer need rewatch")
   534  				// Make sure the rewatch doesn't affect load schemaReplicant, so we watch the global schemaReplicant version asynchronously.
   535  				syncer.WatchGlobalSchemaVer(context.Background())
   536  			}
   537  		case <-syncer.Done():
   538  			// The schemaReplicant syncer stops, we need stop the schemaReplicant validator to synchronize the schemaReplicant version.
   539  			logutil.BgLogger().Info("reload schemaReplicant in loop, schemaReplicant syncer need restart")
   540  			// The etcd is responsible for schemaReplicant synchronization, we should ensure there is at most two different schemaReplicant version
   541  			// in the MilevaDB cluster, to make the data/schemaReplicant be consistent. If we lost connection/stochastik to etcd, the cluster
   542  			// will treats this MilevaDB as a down instance, and etcd will remove the key of `/milevadb/dbs/all_schema_versions/milevadb-id`.
   543  			// Say the schemaReplicant version now is 1, the tenant is changing the schemaReplicant version to 2, it will not wait for this down MilevaDB syncing the schemaReplicant,
   544  			// then continue to change the MilevaDB schemaReplicant to version 3. Unfortunately, this down MilevaDB schemaReplicant version will still be version 1.
   545  			// And version 1 is not consistent to version 3. So we need to stop the schemaReplicant validator to prohibit the DML executing.
   546  			do.SchemaValidator.Stop()
   547  			err := do.mustRestartSyncer(ctx)
   548  			if err != nil {
   549  				logutil.BgLogger().Error("reload schemaReplicant in loop, schemaReplicant syncer restart failed", zap.Error(err))
   550  				break
   551  			}
   552  			// The schemaReplicant maybe changed, must reload schemaReplicant then the schemaReplicant validator can restart.
   553  			exitLoop := do.mustReload()
   554  			// petri is cosed.
   555  			if exitLoop {
   556  				logutil.BgLogger().Error("petri is closed, exit loadSchemaInLoop")
   557  				return
   558  			}
   559  			do.SchemaValidator.Restart()
   560  			logutil.BgLogger().Info("schemaReplicant syncer restarted")
   561  		case <-do.exit:
   562  			return
   563  		}
   564  	}
   565  }
   566  
   567  // mustRestartSyncer tries to restart the SchemaSyncer.
   568  // It returns until it's successful or the petri is stoped.
   569  func (do *Petri) mustRestartSyncer(ctx context.Context) error {
   570  	syncer := do.dbs.SchemaSyncer()
   571  
   572  	for {
   573  		err := syncer.Restart(ctx)
   574  		if err == nil {
   575  			return nil
   576  		}
   577  		// If the petri has stopped, we return an error immediately.
   578  		if do.isClose() {
   579  			return err
   580  		}
   581  		logutil.BgLogger().Error("restart the schemaReplicant syncer failed", zap.Error(err))
   582  		time.Sleep(time.Second)
   583  	}
   584  }
   585  
   586  // mustReload tries to Reload the schemaReplicant, it returns until it's successful or the petri is closed.
   587  // it returns false when it is successful, returns true when the petri is closed.
   588  func (do *Petri) mustReload() (exitLoop bool) {
   589  	for {
   590  		err := do.Reload()
   591  		if err == nil {
   592  			logutil.BgLogger().Info("mustReload succeed")
   593  			return false
   594  		}
   595  
   596  		// If the petri is closed, we returns immediately.
   597  		logutil.BgLogger().Info("reload the schemaReplicant failed", zap.Error(err))
   598  		if do.isClose() {
   599  			return true
   600  		}
   601  		time.Sleep(200 * time.Millisecond)
   602  	}
   603  }
   604  
   605  func (do *Petri) isClose() bool {
   606  	select {
   607  	case <-do.exit:
   608  		logutil.BgLogger().Info("petri is closed")
   609  		return true
   610  	default:
   611  	}
   612  	return false
   613  }
   614  
   615  // Close closes the Petri and release its resource.
   616  func (do *Petri) Close() {
   617  	if do == nil {
   618  		return
   619  	}
   620  	startTime := time.Now()
   621  	if do.dbs != nil {
   622  		terror.Log(do.dbs.Stop())
   623  	}
   624  	if do.info != nil {
   625  		do.info.RemoveServerInfo()
   626  		do.info.RemoveMinStartTS()
   627  	}
   628  	close(do.exit)
   629  	if do.etcdClient != nil {
   630  		terror.Log(errors.Trace(do.etcdClient.Close()))
   631  	}
   632  
   633  	do.sysStochastikPool.Close()
   634  	do.slowQuery.Close()
   635  	do.cancel()
   636  	do.wg.Wait()
   637  	logutil.BgLogger().Info("petri closed", zap.Duration("take time", time.Since(startTime)))
   638  }
   639  
   640  type dbsCallback struct {
   641  	dbs.BaseCallback
   642  	do *Petri
   643  }
   644  
   645  func (c *dbsCallback) OnChanged(err error) error {
   646  	if err != nil {
   647  		return err
   648  	}
   649  	logutil.BgLogger().Info("performing DBS change, must reload")
   650  
   651  	err = c.do.Reload()
   652  	if err != nil {
   653  		logutil.BgLogger().Error("performing DBS change failed", zap.Error(err))
   654  	}
   655  
   656  	return nil
   657  }
   658  
   659  const resourceIdleTimeout = 3 * time.Minute // resources in the ResourcePool will be recycled after idleTimeout
   660  
   661  // NewPetri creates a new petri. Should not create multiple petris for the same causetstore.
   662  func NewPetri(causetstore ekv.CausetStorage, dbsLease time.Duration, statsLease time.Duration, idxUsageSyncLease time.Duration, factory pools.Factory) *Petri {
   663  	capacity := 200 // capacity of the sysStochastikPool size
   664  	do := &Petri{
   665  		causetstore:         causetstore,
   666  		exit:                make(chan struct{}),
   667  		sysStochastikPool:   newStochastikPool(capacity, factory),
   668  		statsLease:          statsLease,
   669  		infoHandle:          schemareplicant.NewHandle(causetstore),
   670  		slowQuery:           newTopNSlowQueries(30, time.Hour*24*7, 500),
   671  		indexUsageSyncLease: idxUsageSyncLease,
   672  	}
   673  
   674  	do.SchemaValidator = NewSchemaValidator(dbsLease, do)
   675  	return do
   676  }
   677  
   678  // Init initializes a petri.
   679  func (do *Petri) Init(dbsLease time.Duration, sysFactory func(*Petri) (pools.Resource, error)) error {
   680  	perfschema.Init()
   681  	if ebd, ok := do.causetstore.(einsteindb.EtcdBackend); ok {
   682  		var addrs []string
   683  		var err error
   684  		if addrs, err = ebd.EtcdAddrs(); err != nil {
   685  			return err
   686  		}
   687  		if addrs != nil {
   688  			cfg := config.GetGlobalConfig()
   689  			// silence etcd warn log, when petri closed, it won't randomly print warn log
   690  			// see details at the issue https://github.com/whtcorpsinc/milevadb/issues/15479
   691  			etcdLogCfg := zap.NewProductionConfig()
   692  			etcdLogCfg.Level = zap.NewAtomicLevelAt(zap.ErrorLevel)
   693  			cli, err := clientv3.New(clientv3.Config{
   694  				LogConfig:        &etcdLogCfg,
   695  				Endpoints:        addrs,
   696  				AutoSyncInterval: 30 * time.Second,
   697  				DialTimeout:      5 * time.Second,
   698  				DialOptions: []grpc.DialOption{
   699  					grpc.WithBackoffMaxDelay(time.Second * 3),
   700  					grpc.WithKeepaliveParams(keepalive.ClientParameters{
   701  						Time:    time.Duration(cfg.EinsteinDBClient.GrpcKeepAliveTime) * time.Second,
   702  						Timeout: time.Duration(cfg.EinsteinDBClient.GrpcKeepAliveTimeout) * time.Second,
   703  					}),
   704  				},
   705  				TLS: ebd.TLSConfig(),
   706  			})
   707  			if err != nil {
   708  				return errors.Trace(err)
   709  			}
   710  			do.etcdClient = cli
   711  		}
   712  	}
   713  
   714  	// TODO: Here we create new stochastik with sysFac in DBS,
   715  	// which will use `do` as Petri instead of call `domap.Get`.
   716  	// That's because `domap.Get` requires a dagger, but before
   717  	// we initialize Petri finish, we can't require that again.
   718  	// After we remove the lazy logic of creating Petri, we
   719  	// can simplify code here.
   720  	sysFac := func() (pools.Resource, error) {
   721  		return sysFactory(do)
   722  	}
   723  	sysCtxPool := pools.NewResourcePool(sysFac, 2, 2, resourceIdleTimeout)
   724  	ctx, cancelFunc := context.WithCancel(context.Background())
   725  	do.cancel = cancelFunc
   726  	callback := &dbsCallback{do: do}
   727  	d := do.dbs
   728  	do.dbs = dbs.NewDBS(
   729  		ctx,
   730  		dbs.WithEtcdClient(do.etcdClient),
   731  		dbs.WithStore(do.causetstore),
   732  		dbs.WithInfoHandle(do.infoHandle),
   733  		dbs.WithHook(callback),
   734  		dbs.WithLease(dbsLease),
   735  	)
   736  	err := do.dbs.Start(sysCtxPool)
   737  	if err != nil {
   738  		return err
   739  	}
   740  	failpoint.Inject("MockReplaceDBS", func(val failpoint.Value) {
   741  		if val.(bool) {
   742  			if err := do.dbs.Stop(); err != nil {
   743  				logutil.BgLogger().Error("stop DBS failed", zap.Error(err))
   744  			}
   745  			do.dbs = d
   746  		}
   747  	})
   748  
   749  	skipRegisterToDashboard := config.GetGlobalConfig().SkipRegisterToDashboard
   750  	err = do.dbs.SchemaSyncer().Init(ctx)
   751  	if err != nil {
   752  		return err
   753  	}
   754  	do.info, err = infosync.GlobalInfoSyncerInit(ctx, do.dbs.GetID(), do.etcdClient, skipRegisterToDashboard)
   755  	if err != nil {
   756  		return err
   757  	}
   758  	err = do.Reload()
   759  	if err != nil {
   760  		return err
   761  	}
   762  
   763  	// Only when the causetstore is local that the lease value is 0.
   764  	// If the causetstore is local, it doesn't need loadSchemaInLoop.
   765  	if dbsLease > 0 {
   766  		do.wg.Add(1)
   767  		// Local causetstore needs to get the change information for every DBS state in each stochastik.
   768  		go do.loadSchemaInLoop(ctx, dbsLease)
   769  	}
   770  	do.wg.Add(1)
   771  	go do.topNSlowQueryLoop()
   772  
   773  	do.wg.Add(1)
   774  	go do.infoSyncerKeeper()
   775  
   776  	if !skipRegisterToDashboard {
   777  		do.wg.Add(1)
   778  		go do.topologySyncerKeeper()
   779  	}
   780  
   781  	return nil
   782  }
   783  
   784  type stochastikPool struct {
   785  	resources chan pools.Resource
   786  	factory   pools.Factory
   787  	mu        struct {
   788  		sync.RWMutex
   789  		closed bool
   790  	}
   791  }
   792  
   793  func newStochastikPool(cap int, factory pools.Factory) *stochastikPool {
   794  	return &stochastikPool{
   795  		resources: make(chan pools.Resource, cap),
   796  		factory:   factory,
   797  	}
   798  }
   799  
   800  func (p *stochastikPool) Get() (resource pools.Resource, err error) {
   801  	var ok bool
   802  	select {
   803  	case resource, ok = <-p.resources:
   804  		if !ok {
   805  			err = errors.New("stochastik pool closed")
   806  		}
   807  	default:
   808  		resource, err = p.factory()
   809  	}
   810  	return
   811  }
   812  
   813  func (p *stochastikPool) Put(resource pools.Resource) {
   814  	p.mu.RLock()
   815  	defer p.mu.RUnlock()
   816  	if p.mu.closed {
   817  		resource.Close()
   818  		return
   819  	}
   820  
   821  	select {
   822  	case p.resources <- resource:
   823  	default:
   824  		resource.Close()
   825  	}
   826  }
   827  func (p *stochastikPool) Close() {
   828  	p.mu.Lock()
   829  	if p.mu.closed {
   830  		p.mu.Unlock()
   831  		return
   832  	}
   833  	p.mu.closed = true
   834  	close(p.resources)
   835  	p.mu.Unlock()
   836  
   837  	for r := range p.resources {
   838  		r.Close()
   839  	}
   840  }
   841  
   842  // SysStochastikPool returns the system stochastik pool.
   843  func (do *Petri) SysStochastikPool() *stochastikPool {
   844  	return do.sysStochastikPool
   845  }
   846  
   847  // GetEtcdClient returns the etcd client.
   848  func (do *Petri) GetEtcdClient() *clientv3.Client {
   849  	return do.etcdClient
   850  }
   851  
   852  // LoadPrivilegeLoop create a goroutine loads privilege blocks in a loop, it
   853  // should be called only once in BootstrapStochastik.
   854  func (do *Petri) LoadPrivilegeLoop(ctx stochastikctx.Context) error {
   855  	ctx.GetStochastikVars().InRestrictedALLEGROSQL = true
   856  	do.privHandle = privileges.NewHandle()
   857  	err := do.privHandle.UFIDelate(ctx)
   858  	if err != nil {
   859  		return err
   860  	}
   861  
   862  	var watchCh clientv3.WatchChan
   863  	duration := 5 * time.Minute
   864  	if do.etcdClient != nil {
   865  		watchCh = do.etcdClient.Watch(context.Background(), privilegeKey)
   866  		duration = 10 * time.Minute
   867  	}
   868  
   869  	do.wg.Add(1)
   870  	go func() {
   871  		defer func() {
   872  			do.wg.Done()
   873  			logutil.BgLogger().Info("loadPrivilegeInLoop exited.")
   874  			soliton.Recover(metrics.LabelPetri, "loadPrivilegeInLoop", nil, false)
   875  		}()
   876  		var count int
   877  		for {
   878  			ok := true
   879  			select {
   880  			case <-do.exit:
   881  				return
   882  			case _, ok = <-watchCh:
   883  			case <-time.After(duration):
   884  			}
   885  			if !ok {
   886  				logutil.BgLogger().Error("load privilege loop watch channel closed")
   887  				watchCh = do.etcdClient.Watch(context.Background(), privilegeKey)
   888  				count++
   889  				if count > 10 {
   890  					time.Sleep(time.Duration(count) * time.Second)
   891  				}
   892  				continue
   893  			}
   894  
   895  			count = 0
   896  			err := do.privHandle.UFIDelate(ctx)
   897  			metrics.LoadPrivilegeCounter.WithLabelValues(metrics.RetLabel(err)).Inc()
   898  			if err != nil {
   899  				logutil.BgLogger().Error("load privilege failed", zap.Error(err))
   900  			}
   901  		}
   902  	}()
   903  	return nil
   904  }
   905  
   906  // PrivilegeHandle returns the MyALLEGROSQLPrivilege.
   907  func (do *Petri) PrivilegeHandle() *privileges.Handle {
   908  	return do.privHandle
   909  }
   910  
   911  // BindHandle returns petri's bindHandle.
   912  func (do *Petri) BindHandle() *bindinfo.BindHandle {
   913  	return do.bindHandle
   914  }
   915  
   916  // LoadBindInfoLoop create a goroutine loads BindInfo in a loop, it should
   917  // be called only once in BootstrapStochastik.
   918  func (do *Petri) LoadBindInfoLoop(ctxForHandle stochastikctx.Context, ctxForEvolve stochastikctx.Context) error {
   919  	ctxForHandle.GetStochastikVars().InRestrictedALLEGROSQL = true
   920  	ctxForEvolve.GetStochastikVars().InRestrictedALLEGROSQL = true
   921  	do.bindHandle = bindinfo.NewBindHandle(ctxForHandle)
   922  	err := do.bindHandle.UFIDelate(true)
   923  	if err != nil || bindinfo.Lease == 0 {
   924  		return err
   925  	}
   926  
   927  	do.globalBindHandleWorkerLoop()
   928  	do.handleEvolveCausetTasksLoop(ctxForEvolve)
   929  	return nil
   930  }
   931  
   932  func (do *Petri) globalBindHandleWorkerLoop() {
   933  	do.wg.Add(1)
   934  	go func() {
   935  		defer func() {
   936  			do.wg.Done()
   937  			logutil.BgLogger().Info("globalBindHandleWorkerLoop exited.")
   938  			soliton.Recover(metrics.LabelPetri, "globalBindHandleWorkerLoop", nil, false)
   939  		}()
   940  		bindWorkerTicker := time.NewTicker(bindinfo.Lease)
   941  		defer bindWorkerTicker.Stop()
   942  		for {
   943  			select {
   944  			case <-do.exit:
   945  				return
   946  			case <-bindWorkerTicker.C:
   947  				err := do.bindHandle.UFIDelate(false)
   948  				if err != nil {
   949  					logutil.BgLogger().Error("uFIDelate bindinfo failed", zap.Error(err))
   950  				}
   951  				do.bindHandle.DropInvalidBindRecord()
   952  				if variable.MilevaDBOptOn(variable.CaptureCausetBaseline.GetVal()) {
   953  					do.bindHandle.CaptureBaselines()
   954  				}
   955  				do.bindHandle.SaveEvolveTasksToStore()
   956  			}
   957  		}
   958  	}()
   959  }
   960  
   961  func (do *Petri) handleEvolveCausetTasksLoop(ctx stochastikctx.Context) {
   962  	do.wg.Add(1)
   963  	go func() {
   964  		defer func() {
   965  			do.wg.Done()
   966  			logutil.BgLogger().Info("handleEvolveCausetTasksLoop exited.")
   967  			soliton.Recover(metrics.LabelPetri, "handleEvolveCausetTasksLoop", nil, false)
   968  		}()
   969  		tenant := do.newTenantManager(bindinfo.Prompt, bindinfo.TenantKey)
   970  		for {
   971  			select {
   972  			case <-do.exit:
   973  				tenant.Cancel()
   974  				return
   975  			case <-time.After(bindinfo.Lease):
   976  			}
   977  			if tenant.IsTenant() {
   978  				err := do.bindHandle.HandleEvolveCausetTask(ctx, false)
   979  				if err != nil {
   980  					logutil.BgLogger().Info("evolve plan failed", zap.Error(err))
   981  				}
   982  			}
   983  		}
   984  	}()
   985  }
   986  
   987  // TelemetryLoop create a goroutine that reports usage data in a loop, it should be called only once
   988  // in BootstrapStochastik.
   989  func (do *Petri) TelemetryLoop(ctx stochastikctx.Context) {
   990  	ctx.GetStochastikVars().InRestrictedALLEGROSQL = true
   991  	do.wg.Add(1)
   992  	go func() {
   993  		defer func() {
   994  			do.wg.Done()
   995  			logutil.BgLogger().Info("handleTelemetryLoop exited.")
   996  			soliton.Recover(metrics.LabelPetri, "handleTelemetryLoop", nil, false)
   997  		}()
   998  		tenant := do.newTenantManager(telemetry.Prompt, telemetry.TenantKey)
   999  		for {
  1000  			select {
  1001  			case <-do.exit:
  1002  				tenant.Cancel()
  1003  				return
  1004  			case <-time.After(telemetry.ReportInterval):
  1005  				if !tenant.IsTenant() {
  1006  					continue
  1007  				}
  1008  				err := telemetry.ReportUsageData(ctx, do.GetEtcdClient())
  1009  				if err != nil {
  1010  					// Only status uFIDelate errors will be printed out
  1011  					logutil.BgLogger().Warn("handleTelemetryLoop status uFIDelate failed", zap.Error(err))
  1012  				}
  1013  			}
  1014  		}
  1015  	}()
  1016  }
  1017  
  1018  // StatsHandle returns the statistic handle.
  1019  func (do *Petri) StatsHandle() *handle.Handle {
  1020  	return (*handle.Handle)(atomic.LoadPointer(&do.statsHandle))
  1021  }
  1022  
  1023  // CreateStatsHandle is used only for test.
  1024  func (do *Petri) CreateStatsHandle(ctx stochastikctx.Context) {
  1025  	atomic.StorePointer(&do.statsHandle, unsafe.Pointer(handle.NewHandle(ctx, do.statsLease)))
  1026  }
  1027  
  1028  // StatsUFIDelating checks if the stats worker is uFIDelating.
  1029  func (do *Petri) StatsUFIDelating() bool {
  1030  	return do.statsUFIDelating.Get() > 0
  1031  }
  1032  
  1033  // SetStatsUFIDelating sets the value of stats uFIDelating.
  1034  func (do *Petri) SetStatsUFIDelating(val bool) {
  1035  	if val {
  1036  		do.statsUFIDelating.Set(1)
  1037  	} else {
  1038  		do.statsUFIDelating.Set(0)
  1039  	}
  1040  }
  1041  
  1042  // RunAutoAnalyze indicates if this MilevaDB server starts auto analyze worker and can run auto analyze job.
  1043  var RunAutoAnalyze = true
  1044  
  1045  // UFIDelateBlockStatsLoop creates a goroutine loads stats info and uFIDelates stats info in a loop.
  1046  // It will also start a goroutine to analyze blocks automatically.
  1047  // It should be called only once in BootstrapStochastik.
  1048  func (do *Petri) UFIDelateBlockStatsLoop(ctx stochastikctx.Context) error {
  1049  	ctx.GetStochastikVars().InRestrictedALLEGROSQL = true
  1050  	statsHandle := handle.NewHandle(ctx, do.statsLease)
  1051  	atomic.StorePointer(&do.statsHandle, unsafe.Pointer(statsHandle))
  1052  	do.dbs.RegisterEventCh(statsHandle.DBSEventCh())
  1053  	// Negative stats lease indicates that it is in test, it does not need uFIDelate.
  1054  	if do.statsLease >= 0 {
  1055  		do.wg.Add(1)
  1056  		go do.loadStatsWorker()
  1057  	}
  1058  	if do.statsLease <= 0 {
  1059  		return nil
  1060  	}
  1061  	tenant := do.newTenantManager(handle.StatsPrompt, handle.StatsTenantKey)
  1062  	do.wg.Add(1)
  1063  	do.SetStatsUFIDelating(true)
  1064  	go do.uFIDelateStatsWorker(ctx, tenant)
  1065  	if RunAutoAnalyze {
  1066  		do.wg.Add(1)
  1067  		go do.autoAnalyzeWorker(tenant)
  1068  	}
  1069  	return nil
  1070  }
  1071  
  1072  func (do *Petri) newTenantManager(prompt, tenantKey string) tenant.Manager {
  1073  	id := do.dbs.TenantManager().ID()
  1074  	var statsTenant tenant.Manager
  1075  	if do.etcdClient == nil {
  1076  		statsTenant = tenant.NewMockManager(context.Background(), id)
  1077  	} else {
  1078  		statsTenant = tenant.NewTenantManager(context.Background(), do.etcdClient, prompt, id, tenantKey)
  1079  	}
  1080  	// TODO: Need to do something when err is not nil.
  1081  	err := statsTenant.CampaignTenant()
  1082  	if err != nil {
  1083  		logutil.BgLogger().Warn("campaign tenant failed", zap.Error(err))
  1084  	}
  1085  	return statsTenant
  1086  }
  1087  
  1088  func (do *Petri) loadStatsWorker() {
  1089  	defer soliton.Recover(metrics.LabelPetri, "loadStatsWorker", nil, false)
  1090  	lease := do.statsLease
  1091  	if lease == 0 {
  1092  		lease = 3 * time.Second
  1093  	}
  1094  	loadTicker := time.NewTicker(lease)
  1095  	defer func() {
  1096  		loadTicker.Stop()
  1097  		do.wg.Done()
  1098  		logutil.BgLogger().Info("loadStatsWorker exited.")
  1099  	}()
  1100  	statsHandle := do.StatsHandle()
  1101  	t := time.Now()
  1102  	err := statsHandle.InitStats(do.SchemaReplicant())
  1103  	if err != nil {
  1104  		logutil.BgLogger().Debug("init stats info failed", zap.Error(err))
  1105  	} else {
  1106  		logutil.BgLogger().Info("init stats info time", zap.Duration("take time", time.Since(t)))
  1107  	}
  1108  	for {
  1109  		select {
  1110  		case <-loadTicker.C:
  1111  			err = statsHandle.UFIDelate(do.SchemaReplicant())
  1112  			if err != nil {
  1113  				logutil.BgLogger().Debug("uFIDelate stats info failed", zap.Error(err))
  1114  			}
  1115  			err = statsHandle.LoadNeededHistograms()
  1116  			if err != nil {
  1117  				logutil.BgLogger().Debug("load histograms failed", zap.Error(err))
  1118  			}
  1119  		case <-do.exit:
  1120  			return
  1121  		}
  1122  	}
  1123  }
  1124  
  1125  func (do *Petri) uFIDelateStatsWorker(ctx stochastikctx.Context, tenant tenant.Manager) {
  1126  	defer soliton.Recover(metrics.LabelPetri, "uFIDelateStatsWorker", nil, false)
  1127  	lease := do.statsLease
  1128  	deltaUFIDelateTicker := time.NewTicker(20 * lease)
  1129  	gcStatsTicker := time.NewTicker(100 * lease)
  1130  	dumpFeedbackTicker := time.NewTicker(200 * lease)
  1131  	loadFeedbackTicker := time.NewTicker(5 * lease)
  1132  	statsHandle := do.StatsHandle()
  1133  	defer func() {
  1134  		loadFeedbackTicker.Stop()
  1135  		dumpFeedbackTicker.Stop()
  1136  		gcStatsTicker.Stop()
  1137  		deltaUFIDelateTicker.Stop()
  1138  		do.SetStatsUFIDelating(false)
  1139  		do.wg.Done()
  1140  		logutil.BgLogger().Info("uFIDelateStatsWorker exited.")
  1141  	}()
  1142  	for {
  1143  		select {
  1144  		case <-do.exit:
  1145  			statsHandle.FlushStats()
  1146  			tenant.Cancel()
  1147  			return
  1148  			// This channel is sent only by dbs tenant.
  1149  		case t := <-statsHandle.DBSEventCh():
  1150  			err := statsHandle.HandleDBSEvent(t)
  1151  			if err != nil {
  1152  				logutil.BgLogger().Debug("handle dbs event failed", zap.Error(err))
  1153  			}
  1154  		case <-deltaUFIDelateTicker.C:
  1155  			err := statsHandle.DumpStatsDeltaToKV(handle.DumFIDelelta)
  1156  			if err != nil {
  1157  				logutil.BgLogger().Debug("dump stats delta failed", zap.Error(err))
  1158  			}
  1159  			statsHandle.UFIDelateErrorRate(do.SchemaReplicant())
  1160  		case <-loadFeedbackTicker.C:
  1161  			statsHandle.UFIDelateStatsByLocalFeedback(do.SchemaReplicant())
  1162  			if !tenant.IsTenant() {
  1163  				continue
  1164  			}
  1165  			err := statsHandle.HandleUFIDelateStats(do.SchemaReplicant())
  1166  			if err != nil {
  1167  				logutil.BgLogger().Debug("uFIDelate stats using feedback failed", zap.Error(err))
  1168  			}
  1169  		case <-dumpFeedbackTicker.C:
  1170  			err := statsHandle.DumpStatsFeedbackToKV()
  1171  			if err != nil {
  1172  				logutil.BgLogger().Debug("dump stats feedback failed", zap.Error(err))
  1173  			}
  1174  		case <-gcStatsTicker.C:
  1175  			if !tenant.IsTenant() {
  1176  				continue
  1177  			}
  1178  			err := statsHandle.GCStats(do.SchemaReplicant(), do.DBS().GetLease())
  1179  			if err != nil {
  1180  				logutil.BgLogger().Debug("GC stats failed", zap.Error(err))
  1181  			}
  1182  		}
  1183  	}
  1184  }
  1185  
  1186  func (do *Petri) autoAnalyzeWorker(tenant tenant.Manager) {
  1187  	defer soliton.Recover(metrics.LabelPetri, "autoAnalyzeWorker", nil, false)
  1188  	statsHandle := do.StatsHandle()
  1189  	analyzeTicker := time.NewTicker(do.statsLease)
  1190  	defer func() {
  1191  		analyzeTicker.Stop()
  1192  		do.wg.Done()
  1193  		logutil.BgLogger().Info("autoAnalyzeWorker exited.")
  1194  	}()
  1195  	for {
  1196  		select {
  1197  		case <-analyzeTicker.C:
  1198  			if tenant.IsTenant() {
  1199  				statsHandle.HandleAutoAnalyze(do.SchemaReplicant())
  1200  			}
  1201  		case <-do.exit:
  1202  			return
  1203  		}
  1204  	}
  1205  }
  1206  
  1207  // ExpensiveQueryHandle returns the expensive query handle.
  1208  func (do *Petri) ExpensiveQueryHandle() *expensivequery.Handle {
  1209  	return do.expensiveQueryHandle
  1210  }
  1211  
  1212  // InitExpensiveQueryHandle init the expensive query handler.
  1213  func (do *Petri) InitExpensiveQueryHandle() {
  1214  	do.expensiveQueryHandle = expensivequery.NewExpensiveQueryHandle(do.exit)
  1215  }
  1216  
  1217  const privilegeKey = "/milevadb/privilege"
  1218  
  1219  // NotifyUFIDelatePrivilege uFIDelates privilege key in etcd, MilevaDB client that watches
  1220  // the key will get notification.
  1221  func (do *Petri) NotifyUFIDelatePrivilege(ctx stochastikctx.Context) {
  1222  	if do.etcdClient != nil {
  1223  		event := do.etcdClient.KV
  1224  		_, err := event.Put(context.Background(), privilegeKey, "")
  1225  		if err != nil {
  1226  			logutil.BgLogger().Warn("notify uFIDelate privilege failed", zap.Error(err))
  1227  		}
  1228  	}
  1229  	// uFIDelate locally
  1230  	_, _, err := ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate).InterDircRestrictedALLEGROSQL(`FLUSH PRIVILEGES`)
  1231  	if err != nil {
  1232  		logutil.BgLogger().Error("unable to uFIDelate privileges", zap.Error(err))
  1233  	}
  1234  }
  1235  
  1236  var (
  1237  	// ErrSchemaReplicantExpired returns the error that information schemaReplicant is out of date.
  1238  	ErrSchemaReplicantExpired = terror.ClassPetri.New(errno.ErrSchemaReplicantExpired, errno.MyALLEGROSQLErrName[errno.ErrSchemaReplicantExpired])
  1239  	// ErrSchemaReplicantChanged returns the error that information schemaReplicant is changed.
  1240  	ErrSchemaReplicantChanged = terror.ClassPetri.New(errno.ErrSchemaReplicantChanged,
  1241  		errno.MyALLEGROSQLErrName[errno.ErrSchemaReplicantChanged]+". "+ekv.TxnRetryableMark)
  1242  )