github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/interlock/inspection_result_test.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package interlock_test
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"strings"
    20  
    21  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    22  	. "github.com/whtcorpsinc/check"
    23  	"github.com/whtcorpsinc/failpoint"
    24  	"github.com/whtcorpsinc/milevadb/schemareplicant"
    25  	"github.com/whtcorpsinc/milevadb/soliton/testkit"
    26  	"github.com/whtcorpsinc/milevadb/stochastik"
    27  	"github.com/whtcorpsinc/milevadb/stochastikctx/variable"
    28  	"github.com/whtcorpsinc/milevadb/types"
    29  )
    30  
    31  var _ = SerialSuites(&inspectionResultSuite{&testClusterBlockBase{}})
    32  
    33  type inspectionResultSuite struct{ *testClusterBlockBase }
    34  
    35  func (s *inspectionResultSuite) TestInspectionResult(c *C) {
    36  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
    37  
    38  	mockData := map[string]variable.BlockSnapshot{}
    39  	// mock configuration inconsistent
    40  	mockData[schemareplicant.BlockClusterConfig] = variable.BlockSnapshot{
    41  		Events: [][]types.Causet{
    42  			types.MakeCausets("milevadb", "192.168.3.22:4000", "dbs.lease", "1"),
    43  			types.MakeCausets("milevadb", "192.168.3.23:4000", "dbs.lease", "2"),
    44  			types.MakeCausets("milevadb", "192.168.3.24:4000", "dbs.lease", "1"),
    45  			types.MakeCausets("milevadb", "192.168.3.25:4000", "dbs.lease", "1"),
    46  			types.MakeCausets("milevadb", "192.168.3.24:4000", "status.status-port", "10080"),
    47  			types.MakeCausets("milevadb", "192.168.3.25:4000", "status.status-port", "10081"),
    48  			types.MakeCausets("milevadb", "192.168.3.24:4000", "log.slow-threshold", "0"),
    49  			types.MakeCausets("milevadb", "192.168.3.25:4000", "log.slow-threshold", "1"),
    50  			types.MakeCausets("einsteindb", "192.168.3.32:26600", "interlock.high", "8"),
    51  			types.MakeCausets("einsteindb", "192.168.3.33:26600", "interlock.high", "8"),
    52  			types.MakeCausets("einsteindb", "192.168.3.34:26600", "interlock.high", "7"),
    53  			types.MakeCausets("einsteindb", "192.168.3.35:26600", "interlock.high", "7"),
    54  			types.MakeCausets("einsteindb", "192.168.3.35:26600", "raftstore.sync-log", "false"),
    55  			types.MakeCausets("fidel", "192.168.3.32:2379", "scheduler.limit", "3"),
    56  			types.MakeCausets("fidel", "192.168.3.33:2379", "scheduler.limit", "3"),
    57  			types.MakeCausets("fidel", "192.168.3.34:2379", "scheduler.limit", "3"),
    58  			types.MakeCausets("fidel", "192.168.3.35:2379", "scheduler.limit", "3"),
    59  			types.MakeCausets("fidel", "192.168.3.34:2379", "advertise-client-urls", "0"),
    60  			types.MakeCausets("fidel", "192.168.3.35:2379", "advertise-client-urls", "1"),
    61  			types.MakeCausets("fidel", "192.168.3.34:2379", "advertise-peer-urls", "0"),
    62  			types.MakeCausets("fidel", "192.168.3.35:2379", "advertise-peer-urls", "1"),
    63  			types.MakeCausets("fidel", "192.168.3.34:2379", "client-urls", "0"),
    64  			types.MakeCausets("fidel", "192.168.3.35:2379", "client-urls", "1"),
    65  			types.MakeCausets("fidel", "192.168.3.34:2379", "log.file.filename", "0"),
    66  			types.MakeCausets("fidel", "192.168.3.35:2379", "log.file.filename", "1"),
    67  			types.MakeCausets("fidel", "192.168.3.34:2379", "metric.job", "0"),
    68  			types.MakeCausets("fidel", "192.168.3.35:2379", "metric.job", "1"),
    69  			types.MakeCausets("fidel", "192.168.3.34:2379", "name", "0"),
    70  			types.MakeCausets("fidel", "192.168.3.35:2379", "name", "1"),
    71  			types.MakeCausets("fidel", "192.168.3.34:2379", "peer-urls", "0"),
    72  			types.MakeCausets("fidel", "192.168.3.35:2379", "peer-urls", "1"),
    73  		},
    74  	}
    75  	// mock version inconsistent
    76  	mockData[schemareplicant.BlockClusterInfo] = variable.BlockSnapshot{
    77  		Events: [][]types.Causet{
    78  			types.MakeCausets("milevadb", "192.168.1.11:1234", "192.168.1.11:1234", "4.0", "a234c"),
    79  			types.MakeCausets("milevadb", "192.168.1.12:1234", "192.168.1.11:1234", "4.0", "a234d"),
    80  			types.MakeCausets("milevadb", "192.168.1.13:1234", "192.168.1.11:1234", "4.0", "a234e"),
    81  			types.MakeCausets("einsteindb", "192.168.1.21:1234", "192.168.1.21:1234", "4.0", "c234d"),
    82  			types.MakeCausets("einsteindb", "192.168.1.22:1234", "192.168.1.22:1234", "4.0", "c234d"),
    83  			types.MakeCausets("einsteindb", "192.168.1.23:1234", "192.168.1.23:1234", "4.0", "c234e"),
    84  			types.MakeCausets("fidel", "192.168.1.31:1234", "192.168.1.31:1234", "4.0", "m234c"),
    85  			types.MakeCausets("fidel", "192.168.1.32:1234", "192.168.1.32:1234", "4.0", "m234d"),
    86  			types.MakeCausets("fidel", "192.168.1.33:1234", "192.168.1.33:1234", "4.0", "m234e"),
    87  		},
    88  	}
    89  	mockData[schemareplicant.BlockClusterHardware] = variable.BlockSnapshot{
    90  		Events: [][]types.Causet{
    91  			types.MakeCausets("einsteindb", "192.168.1.22:1234", "disk", "sda", "used-percent", "80"),
    92  			types.MakeCausets("einsteindb", "192.168.1.23:1234", "disk", "sdb", "used-percent", "50"),
    93  			types.MakeCausets("fidel", "192.168.1.31:1234", "cpu", "cpu", "cpu-logical-embeddeds", "1"),
    94  			types.MakeCausets("fidel", "192.168.1.32:1234", "cpu", "cpu", "cpu-logical-embeddeds", "4"),
    95  			types.MakeCausets("fidel", "192.168.1.33:1234", "cpu", "cpu", "cpu-logical-embeddeds", "10"),
    96  		},
    97  	}
    98  
    99  	datetime := func(str string) types.Time {
   100  		return s.parseTime(c, tk.Se, str)
   101  	}
   102  	// construct some mock abnormal data
   103  	mockMetric := map[string][][]types.Causet{
   104  		"node_total_memory": {
   105  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.33:26600", 50.0*1024*1024*1024),
   106  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.34:26600", 50.0*1024*1024*1024),
   107  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.35:26600", 50.0*1024*1024*1024),
   108  		},
   109  	}
   110  
   111  	ctx := s.setupForInspection(c, mockMetric, mockData)
   112  	defer s.tearDownForInspection(c)
   113  
   114  	cases := []struct {
   115  		allegrosql string
   116  		rows       []string
   117  	}{
   118  		{
   119  			allegrosql: "select rule, item, type, value, reference, severity, details from information_schema.inspection_result where rule in ('config', 'version')",
   120  			rows: []string{
   121  				"config interlock.high einsteindb inconsistent consistent warning 192.168.3.32:26600,192.168.3.33:26600 config value is 8\n192.168.3.34:26600,192.168.3.35:26600 config value is 7",
   122  				"config dbs.lease milevadb inconsistent consistent warning 192.168.3.22:4000,192.168.3.24:4000,192.168.3.25:4000 config value is 1\n192.168.3.23:4000 config value is 2",
   123  				"config log.slow-threshold milevadb 0 not 0 warning slow-threshold = 0 will record every query to slow log, it may affect performance",
   124  				"config log.slow-threshold milevadb inconsistent consistent warning 192.168.3.24:4000 config value is 0\n192.168.3.25:4000 config value is 1",
   125  				"config raftstore.sync-log einsteindb false not false warning sync-log should be true to avoid recover region when the machine breaks down",
   126  				"version git_hash fidel inconsistent consistent critical the cluster has 3 different fidel versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='fidel'",
   127  				"version git_hash milevadb inconsistent consistent critical the cluster has 3 different milevadb versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='milevadb'",
   128  				"version git_hash einsteindb inconsistent consistent critical the cluster has 2 different einsteindb versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='einsteindb'",
   129  			},
   130  		},
   131  		{
   132  			allegrosql: "select rule, item, type, value, reference, severity, details from information_schema.inspection_result where rule in ('config', 'version') and item in ('interlock.high', 'git_hash') and type='einsteindb'",
   133  			rows: []string{
   134  				"config interlock.high einsteindb inconsistent consistent warning 192.168.3.32:26600,192.168.3.33:26600 config value is 8\n192.168.3.34:26600,192.168.3.35:26600 config value is 7",
   135  				"version git_hash einsteindb inconsistent consistent critical the cluster has 2 different einsteindb versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='einsteindb'",
   136  			},
   137  		},
   138  		{
   139  			allegrosql: "select rule, item, type, value, reference, severity, details from information_schema.inspection_result where rule='config'",
   140  			rows: []string{
   141  				"config interlock.high einsteindb inconsistent consistent warning 192.168.3.32:26600,192.168.3.33:26600 config value is 8\n192.168.3.34:26600,192.168.3.35:26600 config value is 7",
   142  				"config dbs.lease milevadb inconsistent consistent warning 192.168.3.22:4000,192.168.3.24:4000,192.168.3.25:4000 config value is 1\n192.168.3.23:4000 config value is 2",
   143  				"config log.slow-threshold milevadb 0 not 0 warning slow-threshold = 0 will record every query to slow log, it may affect performance",
   144  				"config log.slow-threshold milevadb inconsistent consistent warning 192.168.3.24:4000 config value is 0\n192.168.3.25:4000 config value is 1",
   145  				"config raftstore.sync-log einsteindb false not false warning sync-log should be true to avoid recover region when the machine breaks down",
   146  			},
   147  		},
   148  		{
   149  			allegrosql: "select rule, item, type, value, reference, severity, details from information_schema.inspection_result where rule='version' and item='git_hash' and type in ('fidel', 'milevadb')",
   150  			rows: []string{
   151  				"version git_hash fidel inconsistent consistent critical the cluster has 3 different fidel versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='fidel'",
   152  				"version git_hash milevadb inconsistent consistent critical the cluster has 3 different milevadb versions, execute the allegrosql to see more detail: select * from information_schema.cluster_info where type='milevadb'",
   153  			},
   154  		},
   155  	}
   156  
   157  	for _, cs := range cases {
   158  		rs, err := tk.Se.InterDircute(ctx, cs.allegrosql)
   159  		c.Assert(err, IsNil)
   160  		result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("ALLEGROALLEGROSQL: %v", cs.allegrosql))
   161  		warnings := tk.Se.GetStochastikVars().StmtCtx.GetWarnings()
   162  		c.Assert(len(warnings), Equals, 0, Commentf("expected no warning, got: %+v", warnings))
   163  		result.Check(testkit.Events(cs.rows...))
   164  	}
   165  }
   166  
   167  func (s *inspectionResultSuite) parseTime(c *C, se stochastik.Stochastik, str string) types.Time {
   168  	t, err := types.ParseTime(se.GetStochastikVars().StmtCtx, str, allegrosql.TypeDatetime, types.MaxFsp)
   169  	c.Assert(err, IsNil)
   170  	return t
   171  }
   172  
   173  func (s *inspectionResultSuite) tearDownForInspection(c *C) {
   174  	fpName := "github.com/whtcorpsinc/milevadb/interlock/mockMergeMockInspectionBlocks"
   175  	c.Assert(failpoint.Disable(fpName), IsNil)
   176  
   177  	fpName2 := "github.com/whtcorpsinc/milevadb/interlock/mockMetricsBlockData"
   178  	c.Assert(failpoint.Disable(fpName2), IsNil)
   179  }
   180  
   181  func (s *inspectionResultSuite) setupForInspection(c *C, mockData map[string][][]types.Causet, configurations map[string]variable.BlockSnapshot) context.Context {
   182  	// mock einsteindb configuration.
   183  	if configurations == nil {
   184  		configurations = map[string]variable.BlockSnapshot{}
   185  		configurations[schemareplicant.BlockClusterConfig] = variable.BlockSnapshot{
   186  			Events: [][]types.Causet{
   187  				types.MakeCausets("einsteindb", "einsteindb-0", "raftstore.apply-pool-size", "2"),
   188  				types.MakeCausets("einsteindb", "einsteindb-0", "raftstore.causetstore-pool-size", "2"),
   189  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.interlock.high-concurrency", "4"),
   190  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.interlock.low-concurrency", "4"),
   191  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.interlock.normal-concurrency", "4"),
   192  				types.MakeCausets("einsteindb", "einsteindb-1", "readpool.interlock.normal-concurrency", "8"),
   193  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.storage.high-concurrency", "4"),
   194  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.storage.low-concurrency", "4"),
   195  				types.MakeCausets("einsteindb", "einsteindb-0", "readpool.storage.normal-concurrency", "4"),
   196  				types.MakeCausets("einsteindb", "einsteindb-0", "server.grpc-concurrency", "8"),
   197  				types.MakeCausets("einsteindb", "einsteindb-0", "storage.scheduler-worker-pool-size", "6"),
   198  			},
   199  		}
   200  		// mock cluster information
   201  		configurations[schemareplicant.BlockClusterInfo] = variable.BlockSnapshot{
   202  			Events: [][]types.Causet{
   203  				types.MakeCausets("fidel", "fidel-0", "fidel-0", "4.0", "a234c", "", ""),
   204  				types.MakeCausets("milevadb", "milevadb-0", "milevadb-0s", "4.0", "a234c", "", ""),
   205  				types.MakeCausets("milevadb", "milevadb-1", "milevadb-1s", "4.0", "a234c", "", ""),
   206  				types.MakeCausets("einsteindb", "einsteindb-0", "einsteindb-0s", "4.0", "a234c", "", ""),
   207  				types.MakeCausets("einsteindb", "einsteindb-1", "einsteindb-1s", "4.0", "a234c", "", ""),
   208  				types.MakeCausets("einsteindb", "einsteindb-2", "einsteindb-2s", "4.0", "a234c", "", ""),
   209  			},
   210  		}
   211  	}
   212  	fpName := "github.com/whtcorpsinc/milevadb/interlock/mockMergeMockInspectionBlocks"
   213  	c.Assert(failpoint.Enable(fpName, "return"), IsNil)
   214  
   215  	// Mock for metric causet data.
   216  	fpName2 := "github.com/whtcorpsinc/milevadb/interlock/mockMetricsBlockData"
   217  	c.Assert(failpoint.Enable(fpName2, "return"), IsNil)
   218  
   219  	ctx := context.WithValue(context.Background(), "__mockInspectionBlocks", configurations)
   220  	ctx = context.WithValue(ctx, "__mockMetricsBlockData", mockData)
   221  	ctx = failpoint.WithHook(ctx, func(_ context.Context, currName string) bool {
   222  		return fpName2 == currName || currName == fpName
   223  	})
   224  	return ctx
   225  }
   226  
   227  func (s *inspectionResultSuite) TestThresholdCheckInspection(c *C) {
   228  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   229  	datetime := func(str string) types.Time {
   230  		return s.parseTime(c, tk.Se, str)
   231  	}
   232  	// construct some mock abnormal data
   233  	mockData := map[string][][]types.Causet{
   234  		// defCausumns: time, instance, name, value
   235  		"einsteindb_thread_cpu": {
   236  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_normal0", 10.0),
   237  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_normal1", 10.0),
   238  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-1s", "cop_normal0", 10.0),
   239  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_high1", 10.0),
   240  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_high2", 10.0),
   241  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "einsteindb-0s", "cop_high1", 5.0),
   242  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "einsteindb-0s", "cop_high1", 1.0),
   243  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_low1", 10.0),
   244  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "grpc_1", 10.0),
   245  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "raftstore_1", 10.0),
   246  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "apply_0", 10.0),
   247  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_norm1", 10.0),
   248  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_high2", 10.0),
   249  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_low0", 10.0),
   250  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "sched_2", 10.0),
   251  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "split_check", 10.0),
   252  		},
   253  		"FIDel_tso_wait_duration":                   {},
   254  		"milevadb_get_token_duration":               {},
   255  		"milevadb_load_schema_duration":             {},
   256  		"einsteindb_scheduler_command_duration":     {},
   257  		"einsteindb_handle_snapshot_duration":       {},
   258  		"einsteindb_storage_async_request_duration": {},
   259  		"einsteindb_engine_write_duration":          {},
   260  		"einsteindb_engine_max_get_duration":        {},
   261  		"einsteindb_engine_max_seek_duration":       {},
   262  		"einsteindb_scheduler_pending_commands":     {},
   263  		"einsteindb_block_index_cache_hit":          {},
   264  		"einsteindb_block_data_cache_hit":           {},
   265  		"einsteindb_block_filter_cache_hit":         {},
   266  		"FIDel_scheduler_store_status":              {},
   267  		"FIDel_region_health":                       {},
   268  	}
   269  
   270  	ctx := s.setupForInspection(c, mockData, nil)
   271  	defer s.tearDownForInspection(c)
   272  
   273  	rs, err := tk.Se.InterDircute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance,status_address, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item")
   274  	c.Assert(err, IsNil)
   275  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   276  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   277  	result.Check(testkit.Events(
   278  		"apply-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 1.60, config: raftstore.apply-pool-size=2 the 'apply-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   279  		"interlock-high-cpu einsteindb einsteindb-0 einsteindb-0s 20.00 < 3.60, config: readpool.interlock.high-concurrency=4 the 'interlock-high-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   280  		"interlock-low-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 3.60, config: readpool.interlock.low-concurrency=4 the 'interlock-low-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   281  		"interlock-normal-cpu einsteindb einsteindb-0 einsteindb-0s 20.00 < 3.60, config: readpool.interlock.normal-concurrency=4 the 'interlock-normal-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   282  		"interlock-normal-cpu einsteindb einsteindb-1 einsteindb-1s 10.00 < 7.20, config: readpool.interlock.normal-concurrency=8 the 'interlock-normal-cpu' max cpu-usage of einsteindb-1s einsteindb is too high",
   283  		"grpc-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 7.20, config: server.grpc-concurrency=8 the 'grpc-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   284  		"raftstore-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 1.60, config: raftstore.causetstore-pool-size=2 the 'raftstore-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   285  		"scheduler-worker-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 5.10, config: storage.scheduler-worker-pool-size=6 the 'scheduler-worker-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   286  		"split-check-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 0.00 the 'split-check-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   287  		"storage-readpool-high-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 3.60, config: readpool.storage.high-concurrency=4 the 'storage-readpool-high-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   288  		"storage-readpool-low-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 3.60, config: readpool.storage.low-concurrency=4 the 'storage-readpool-low-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   289  		"storage-readpool-normal-cpu einsteindb einsteindb-0 einsteindb-0s 10.00 < 3.60, config: readpool.storage.normal-concurrency=4 the 'storage-readpool-normal-cpu' max cpu-usage of einsteindb-0s einsteindb is too high",
   290  	))
   291  
   292  	// construct some mock normal data
   293  	mockData["einsteindb_thread_cpu"] = [][]types.Causet{
   294  		// defCausumns: time, instance, name, value
   295  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_normal0", 1.0),
   296  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_high1", 0.1),
   297  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "cop_low1", 1.0),
   298  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "grpc_1", 7.21),
   299  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "grpc_2", 0.21),
   300  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "raftstore_1", 1.0),
   301  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "apply_0", 1.0),
   302  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_norm1", 1.0),
   303  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_high2", 1.0),
   304  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "store_read_low0", 1.0),
   305  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "sched_2", 0.3),
   306  		types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "split_check", 0.5),
   307  	}
   308  
   309  	ctx = context.WithValue(ctx, "__mockMetricsBlockData", mockData)
   310  	rs, err = tk.Se.InterDircute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance,status_address, value, reference from information_schema.inspection_result where rule='threshold-check' order by item")
   311  	c.Assert(err, IsNil)
   312  	result = tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   313  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   314  	result.Check(testkit.Events("grpc-cpu einsteindb einsteindb-0 einsteindb-0s 7.42 < 7.20, config: server.grpc-concurrency=8"))
   315  }
   316  
   317  func (s *inspectionResultSuite) TestThresholdCheckInspection2(c *C) {
   318  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   319  	datetime := func(s string) types.Time {
   320  		t, err := types.ParseTime(tk.Se.GetStochastikVars().StmtCtx, s, allegrosql.TypeDatetime, types.MaxFsp)
   321  		c.Assert(err, IsNil)
   322  		return t
   323  	}
   324  
   325  	// construct some mock abnormal data
   326  	mockData := map[string][][]types.Causet{
   327  		"FIDel_tso_wait_duration": {
   328  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", 0.999, 0.06),
   329  		},
   330  		"milevadb_get_token_duration": {
   331  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "milevadb-0s", 0.999, 0.02*10e5),
   332  		},
   333  		"milevadb_load_schema_duration": {
   334  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "milevadb-0s", 0.99, 2.0),
   335  		},
   336  		"einsteindb_scheduler_command_duration": {
   337  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "get", 0.99, 2.0),
   338  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "write", 0.99, 5.0),
   339  		},
   340  		"einsteindb_handle_snapshot_duration": {
   341  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "gen", 0.999, 40.0),
   342  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "read", 0.999, 10.0),
   343  		},
   344  		"einsteindb_storage_async_request_duration": {
   345  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "write", 0.999, 0.2),
   346  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "snapshot", 0.999, 0.06),
   347  		},
   348  		"einsteindb_engine_write_duration": {
   349  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "write_max", "ekv", 0.2*10e5),
   350  		},
   351  		"einsteindb_engine_max_get_duration": {
   352  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "get_max", "ekv", 0.06*10e5),
   353  		},
   354  		"einsteindb_engine_max_seek_duration": {
   355  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "seek_max", "raft", 0.06*10e5),
   356  		},
   357  		"einsteindb_scheduler_pending_commands": {
   358  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", 1001.0),
   359  		},
   360  		"einsteindb_block_index_cache_hit": {
   361  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "ekv", 0.94),
   362  		},
   363  		"einsteindb_block_data_cache_hit": {
   364  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "ekv", 0.79),
   365  		},
   366  		"einsteindb_block_filter_cache_hit": {
   367  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "einsteindb-0s", "ekv", 0.93),
   368  		},
   369  		"einsteindb_thread_cpu":        {},
   370  		"FIDel_scheduler_store_status": {},
   371  		"FIDel_region_health":          {},
   372  	}
   373  
   374  	ctx := s.setupForInspection(c, mockData, nil)
   375  	defer s.tearDownForInspection(c)
   376  
   377  	rs, err := tk.Se.InterDircute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, type, instance, status_address, value, reference, details from information_schema.inspection_result where rule='threshold-check' order by item")
   378  	c.Assert(err, IsNil)
   379  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   380  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   381  	result.Check(testkit.Events(
   382  		"data-causet-cache-hit einsteindb einsteindb-0 einsteindb-0s 0.790 > 0.800 min data-causet-cache-hit rate of einsteindb-0s einsteindb is too low",
   383  		"filter-causet-cache-hit einsteindb einsteindb-0 einsteindb-0s 0.930 > 0.950 min filter-causet-cache-hit rate of einsteindb-0s einsteindb is too low",
   384  		"get-token-duration milevadb milevadb-0 milevadb-0s 0.020 < 0.001 max duration of milevadb-0s milevadb get-token-duration is too slow",
   385  		"handle-snapshot-duration einsteindb einsteindb-0 einsteindb-0s 40.000 < 30.000 max duration of einsteindb-0s einsteindb handle-snapshot-duration is too slow",
   386  		"index-causet-cache-hit einsteindb einsteindb-0 einsteindb-0s 0.940 > 0.950 min index-causet-cache-hit rate of einsteindb-0s einsteindb is too low",
   387  		"load-schemaReplicant-duration milevadb milevadb-0 milevadb-0s 2.000 < 1.000 max duration of milevadb-0s milevadb load-schemaReplicant-duration is too slow",
   388  		"lmdb-get-duration einsteindb einsteindb-0 einsteindb-0s 0.060 < 0.050 max duration of einsteindb-0s einsteindb lmdb-get-duration is too slow",
   389  		"lmdb-seek-duration einsteindb einsteindb-0 einsteindb-0s 0.060 < 0.050 max duration of einsteindb-0s einsteindb lmdb-seek-duration is too slow",
   390  		"lmdb-write-duration einsteindb einsteindb-0 einsteindb-0s 0.200 < 0.100 max duration of einsteindb-0s einsteindb lmdb-write-duration is too slow",
   391  		"scheduler-cmd-duration einsteindb einsteindb-0 einsteindb-0s 5.000 < 0.100 max duration of einsteindb-0s einsteindb scheduler-cmd-duration is too slow",
   392  		"scheduler-pending-cmd-count einsteindb einsteindb-0 einsteindb-0s 1001.000 < 1000.000  einsteindb-0s einsteindb scheduler has too many pending commands",
   393  		"storage-snapshot-duration einsteindb einsteindb-0 einsteindb-0s 0.060 < 0.050 max duration of einsteindb-0s einsteindb storage-snapshot-duration is too slow",
   394  		"storage-write-duration einsteindb einsteindb-0 einsteindb-0s 0.200 < 0.100 max duration of einsteindb-0s einsteindb storage-write-duration is too slow",
   395  		"tso-duration milevadb fidel-0 fidel-0 0.060 < 0.050 max duration of fidel-0 milevadb tso-duration is too slow",
   396  	))
   397  }
   398  
   399  func (s *inspectionResultSuite) TestThresholdCheckInspection3(c *C) {
   400  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   401  	datetime := func(s string) types.Time {
   402  		t, err := types.ParseTime(tk.Se.GetStochastikVars().StmtCtx, s, allegrosql.TypeDatetime, types.MaxFsp)
   403  		c.Assert(err, IsNil)
   404  		return t
   405  	}
   406  
   407  	// construct some mock abnormal data
   408  	mockData := map[string][][]types.Causet{
   409  		"FIDel_scheduler_store_status": {
   410  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-0", "0", "leader_sembedded", 100.0),
   411  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-1", "1", "leader_sembedded", 50.0),
   412  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "fidel-0", "einsteindb-0", "0", "leader_sembedded", 99.0),
   413  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "fidel-0", "einsteindb-1", "1", "leader_sembedded", 51.0),
   414  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-0", "0", "region_sembedded", 100.0),
   415  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-1", "1", "region_sembedded", 90.0),
   416  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-0", "0", "store_available", 100.0),
   417  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-1", "1", "store_available", 70.0),
   418  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-0", "0", "region_count", 20001.0),
   419  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-0", "0", "leader_count", 10000.0),
   420  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "fidel-0", "einsteindb-0", "0", "leader_count", 5000.0),
   421  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "fidel-0", "einsteindb-0", "0", "leader_count", 5000.0),
   422  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-1", "0", "leader_count", 5000.0),
   423  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "fidel-0", "einsteindb-1", "0", "leader_count", 10000.0),
   424  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "einsteindb-2", "0", "leader_count", 10000.0),
   425  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "fidel-0", "einsteindb-2", "0", "leader_count", 0.0),
   426  		},
   427  		"FIDel_region_health": {
   428  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "extra-peer-region-count", 40.0),
   429  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "learner-peer-region-count", 40.0),
   430  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "fidel-0", "pending-peer-region-count", 30.0),
   431  		},
   432  	}
   433  
   434  	ctx := s.setupForInspection(c, mockData, nil)
   435  	defer s.tearDownForInspection(c)
   436  
   437  	rs, err := tk.Se.InterDircute(ctx, `select /*+ time_range('2020-02-14 04:20:00','2020-02-14 05:23:00') */
   438  		item, type, instance,status_address, value, reference, details from information_schema.inspection_result
   439  		where rule='threshold-check' and item in ('leader-sembedded-balance','region-sembedded-balance','region-count','region-health','causetstore-available-balance','leader-drop')
   440  		order by item`)
   441  	c.Assert(err, IsNil)
   442  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   443  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   444  	result.Check(testkit.Events(
   445  		"leader-drop einsteindb einsteindb-2 einsteindb-2s 10000 <= 50 einsteindb-2 einsteindb has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 0",
   446  		"leader-drop einsteindb einsteindb-0 einsteindb-0s 5000 <= 50 einsteindb-0 einsteindb has too many leader-drop around time 2020-02-14 05:21:00.000000, leader count from 10000 drop to 5000",
   447  		"leader-sembedded-balance einsteindb einsteindb-1 einsteindb-1s 50.00% < 5.00% einsteindb-0 max leader_sembedded is 100.00, much more than einsteindb-1 min leader_sembedded 50.00",
   448  		"region-count einsteindb einsteindb-0 einsteindb-0s 20001.00 <= 20000 einsteindb-0 einsteindb has too many regions",
   449  		"region-health fidel fidel-0 fidel-0 110.00 < 100 the count of extra-perr and learner-peer and pending-peer are 110, it means the scheduling is too frequent or too slow",
   450  		"region-sembedded-balance einsteindb einsteindb-1 einsteindb-1s 10.00% < 5.00% einsteindb-0 max region_sembedded is 100.00, much more than einsteindb-1 min region_sembedded 90.00",
   451  		"causetstore-available-balance einsteindb einsteindb-1 einsteindb-1s 30.00% < 20.00% einsteindb-0 max store_available is 100.00, much more than einsteindb-1 min store_available 70.00"))
   452  }
   453  
   454  func (s *inspectionResultSuite) TestCriticalErrorInspection(c *C) {
   455  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   456  
   457  	testServers := s.setupClusterGRPCServer(c)
   458  	defer func() {
   459  		for _, s := range testServers {
   460  			s.server.Stop()
   461  		}
   462  	}()
   463  
   464  	var servers []string
   465  	for _, s := range testServers {
   466  		servers = append(servers, strings.Join([]string{s.typ, s.address, s.address}, ","))
   467  	}
   468  	fpName2 := "github.com/whtcorpsinc/milevadb/interlock/mockClusterLogServerInfo"
   469  	fpExpr := strings.Join(servers, ";")
   470  	c.Assert(failpoint.Enable(fpName2, fmt.Sprintf(`return("%s")`, fpExpr)), IsNil)
   471  	defer func() { c.Assert(failpoint.Disable(fpName2), IsNil) }()
   472  
   473  	datetime := func(str string) types.Time {
   474  		return s.parseTime(c, tk.Se, str)
   475  	}
   476  
   477  	// construct some mock data
   478  	mockData := map[string][][]types.Causet{
   479  		// defCausumns: time, instance, type, value
   480  		"einsteindb_critical_error_total_count": {
   481  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "type1", 0.0),
   482  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-1s", "type1", 1.0),
   483  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "einsteindb-2s", "type2", 5.0),
   484  		},
   485  		// defCausumns: time, instance, value
   486  		"milevadb_panic_count_total_count": {
   487  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "milevadb-0s", 4.0),
   488  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "milevadb-0s", 0.0),
   489  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "milevadb-1s", 1.0),
   490  		},
   491  		// defCausumns: time, instance, value
   492  		"milevadb_binlog_error_total_count": {
   493  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "milevadb-1s", 4.0),
   494  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "milevadb-2s", 0.0),
   495  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "milevadb-3s", 1.0),
   496  		},
   497  		// defCausumns: time, instance, EDB, type, stage, value
   498  		"einsteindb_scheduler_is_busy_total_count": {
   499  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "db1", "type1", "stage1", 1.0),
   500  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-0s", "db2", "type1", "stage2", 2.0),
   501  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "einsteindb-1s", "db1", "type2", "stage1", 3.0),
   502  			types.MakeCausets(datetime("2020-02-12 10:38:00"), "einsteindb-0s", "db1", "type1", "stage2", 4.0),
   503  			types.MakeCausets(datetime("2020-02-12 10:39:00"), "einsteindb-0s", "db2", "type1", "stage1", 5.0),
   504  			types.MakeCausets(datetime("2020-02-12 10:40:00"), "einsteindb-1s", "db1", "type2", "stage2", 6.0),
   505  		},
   506  		// defCausumns: time, instance, EDB, value
   507  		"einsteindb_coprocessor_is_busy_total_count": {
   508  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "db1", 1.0),
   509  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-0s", "db2", 2.0),
   510  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "einsteindb-1s", "db1", 3.0),
   511  			types.MakeCausets(datetime("2020-02-12 10:38:00"), "einsteindb-0s", "db1", 4.0),
   512  			types.MakeCausets(datetime("2020-02-12 10:39:00"), "einsteindb-0s", "db2", 5.0),
   513  			types.MakeCausets(datetime("2020-02-12 10:40:00"), "einsteindb-1s", "db1", 6.0),
   514  		},
   515  		// defCausumns: time, instance, EDB, type, value
   516  		"einsteindb_channel_full_total_count": {
   517  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "db1", "type1", 1.0),
   518  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-0s", "db2", "type1", 2.0),
   519  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "einsteindb-1s", "db1", "type2", 3.0),
   520  			types.MakeCausets(datetime("2020-02-12 10:38:00"), "einsteindb-0s", "db1", "type1", 4.0),
   521  			types.MakeCausets(datetime("2020-02-12 10:39:00"), "einsteindb-0s", "db2", "type1", 5.0),
   522  			types.MakeCausets(datetime("2020-02-12 10:40:00"), "einsteindb-1s", "db1", "type2", 6.0),
   523  		},
   524  		// defCausumns: time, instance, EDB, value
   525  		"einsteindb_engine_write_stall": {
   526  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "ekv", 1.0),
   527  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-0s", "raft", 2.0),
   528  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "einsteindb-1s", "reason3", 3.0),
   529  		},
   530  		// defCausumns: time, instance, job, value
   531  		"up": {
   532  			types.MakeCausets(datetime("2020-02-12 10:35:00"), "einsteindb-0s", "einsteindb", 1.0),
   533  			types.MakeCausets(datetime("2020-02-12 10:36:00"), "einsteindb-0s", "einsteindb", 0.0),
   534  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "milevadb-0s", "milevadb", 0.0),
   535  			types.MakeCausets(datetime("2020-02-12 10:37:00"), "milevadb-1s", "milevadb", 0.0),
   536  			types.MakeCausets(datetime("2020-02-12 10:38:00"), "milevadb-1s", "milevadb", 1.0),
   537  		},
   538  	}
   539  
   540  	ctx := s.setupForInspection(c, mockData, nil)
   541  	defer s.tearDownForInspection(c)
   542  
   543  	rs, err := tk.Se.InterDircute(ctx, "select /*+ time_range('2020-02-12 10:35:00','2020-02-12 10:37:00') */ item, instance,status_address, value, details from information_schema.inspection_result where rule='critical-error'")
   544  	c.Assert(err, IsNil)
   545  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   546  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   547  	result.Check(testkit.Events(
   548  		"server-down einsteindb-0 einsteindb-0s  einsteindb einsteindb-0s disconnect with prometheus around time '2020-02-12 10:36:00.000000'",
   549  		"server-down milevadb-1 milevadb-1s  milevadb milevadb-1s disconnect with prometheus around time '2020-02-12 10:37:00.000000'",
   550  		"channel-is-full einsteindb-1 einsteindb-1s 9.00(db1, type2) the total number of errors about 'channel-is-full' is too many",
   551  		"interlock-is-busy einsteindb-1 einsteindb-1s 9.00(db1) the total number of errors about 'interlock-is-busy' is too many",
   552  		"channel-is-full einsteindb-0 einsteindb-0s 7.00(db2, type1) the total number of errors about 'channel-is-full' is too many",
   553  		"interlock-is-busy einsteindb-0 einsteindb-0s 7.00(db2) the total number of errors about 'interlock-is-busy' is too many",
   554  		"scheduler-is-busy einsteindb-1 einsteindb-1s 6.00(db1, type2, stage2) the total number of errors about 'scheduler-is-busy' is too many",
   555  		"channel-is-full einsteindb-0 einsteindb-0s 5.00(db1, type1) the total number of errors about 'channel-is-full' is too many",
   556  		"interlock-is-busy einsteindb-0 einsteindb-0s 5.00(db1) the total number of errors about 'interlock-is-busy' is too many",
   557  		"critical-error einsteindb-2 einsteindb-2s 5.00(type2) the total number of errors about 'critical-error' is too many",
   558  		"scheduler-is-busy einsteindb-0 einsteindb-0s 5.00(db2, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many",
   559  		"binlog-error milevadb-1 milevadb-1s 4.00 the total number of errors about 'binlog-error' is too many",
   560  		"panic-count milevadb-0 milevadb-0s 4.00 the total number of errors about 'panic-count' is too many",
   561  		"scheduler-is-busy einsteindb-0 einsteindb-0s 4.00(db1, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many",
   562  		"scheduler-is-busy einsteindb-1 einsteindb-1s 3.00(db1, type2, stage1) the total number of errors about 'scheduler-is-busy' is too many",
   563  		"einsteindb_engine_write_stall einsteindb-1 einsteindb-1s 3.00(reason3) the total number of errors about 'einsteindb_engine_write_stall' is too many",
   564  		"scheduler-is-busy einsteindb-0 einsteindb-0s 2.00(db2, type1, stage2) the total number of errors about 'scheduler-is-busy' is too many",
   565  		"einsteindb_engine_write_stall einsteindb-0 einsteindb-0s 2.00(raft) the total number of errors about 'einsteindb_engine_write_stall' is too many",
   566  		"binlog-error  milevadb-3s 1.00 the total number of errors about 'binlog-error' is too many",
   567  		"critical-error einsteindb-1 einsteindb-1s 1.00(type1) the total number of errors about 'critical-error' is too many",
   568  		"panic-count milevadb-1 milevadb-1s 1.00 the total number of errors about 'panic-count' is too many",
   569  		"scheduler-is-busy einsteindb-0 einsteindb-0s 1.00(db1, type1, stage1) the total number of errors about 'scheduler-is-busy' is too many",
   570  		"einsteindb_engine_write_stall einsteindb-0 einsteindb-0s 1.00(ekv) the total number of errors about 'einsteindb_engine_write_stall' is too many",
   571  	))
   572  }
   573  
   574  func (s *inspectionResultSuite) TestNodeLoadInspection(c *C) {
   575  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   576  	datetime := func(s string) types.Time {
   577  		t, err := types.ParseTime(tk.Se.GetStochastikVars().StmtCtx, s, allegrosql.TypeDatetime, types.MaxFsp)
   578  		c.Assert(err, IsNil)
   579  		return t
   580  	}
   581  
   582  	// construct some mock abnormal data
   583  	mockData := map[string][][]types.Causet{
   584  		// defCausumns: time, instance, value
   585  		"node_load1": {
   586  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-0", 28.1),
   587  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-1", 13.0),
   588  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 10.0),
   589  		},
   590  		// defCausumns: time, instance, value
   591  		"node_load5": {
   592  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-0", 27.9),
   593  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-1", 14.1),
   594  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 0.0),
   595  		},
   596  		// defCausumns: time, instance, value
   597  		"node_load15": {
   598  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-0", 30.0),
   599  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "node-1", 14.1),
   600  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 20.0),
   601  		},
   602  		// defCausumns: time, instance, value
   603  		"node_virtual_cpus": {
   604  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 40.0),
   605  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-1", 20.0),
   606  		},
   607  		// defCausumns: time, instance, value
   608  		"node_memory_usage": {
   609  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 80.0),
   610  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-1", 60.0),
   611  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "node-0", 60.0),
   612  		},
   613  		// defCausumns: time, instance, value
   614  		"node_memory_swap_used": {
   615  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", 0.0),
   616  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-1", 1.0),
   617  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "node-1", 0.0),
   618  		},
   619  		// defCausumns: time, instance, device, value
   620  		"node_disk_usage": {
   621  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", "/dev/nvme0", 80.0),
   622  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "node-0", "/dev/nvme0", 50.0),
   623  			types.MakeCausets(datetime("2020-02-14 05:21:00"), "node-0", "tmpfs", 80.0),
   624  			types.MakeCausets(datetime("2020-02-14 05:22:00"), "node-0", "tmpfs", 50.0),
   625  		},
   626  	}
   627  
   628  	ctx := s.setupForInspection(c, mockData, nil)
   629  	defer s.tearDownForInspection(c)
   630  
   631  	rs, err := tk.Se.InterDircute(ctx, `select /*+ time_range('2020-02-14 04:20:00','2020-02-14 05:23:00') */
   632  		item, type, instance, value, reference, details from information_schema.inspection_result
   633  		where rule='node-load' order by item, value`)
   634  	c.Assert(err, IsNil)
   635  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   636  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   637  	result.Check(testkit.Events(
   638  		"cpu-load1 node node-0 28.1 < 28.0 cpu-load1 should less than (cpu_logical_embeddeds * 0.7)",
   639  		"cpu-load15 node node-1 14.1 < 14.0 cpu-load15 should less than (cpu_logical_embeddeds * 0.7)",
   640  		"cpu-load15 node node-0 30.0 < 28.0 cpu-load15 should less than (cpu_logical_embeddeds * 0.7)",
   641  		"cpu-load5 node node-1 14.1 < 14.0 cpu-load5 should less than (cpu_logical_embeddeds * 0.7)",
   642  		"disk-usage node node-0 80.0% < 70% the disk-usage of /dev/nvme0 is too high",
   643  		"swap-memory-used node node-1 1.0 0 ",
   644  		"virtual-memory-usage node node-0 80.0% < 70% the memory-usage is too high",
   645  	))
   646  }
   647  
   648  func (s *inspectionResultSuite) TestConfigCheckOfStorageBlockCacheSize(c *C) {
   649  	tk := testkit.NewTestKitWithInit(c, s.causetstore)
   650  	datetime := func(s string) types.Time {
   651  		t, err := types.ParseTime(tk.Se.GetStochastikVars().StmtCtx, s, allegrosql.TypeDatetime, types.MaxFsp)
   652  		c.Assert(err, IsNil)
   653  		return t
   654  	}
   655  
   656  	configurations := map[string]variable.BlockSnapshot{}
   657  	configurations[schemareplicant.BlockClusterConfig] = variable.BlockSnapshot{
   658  		Events: [][]types.Causet{
   659  			types.MakeCausets("einsteindb", "192.168.3.33:26600", "storage.causet-cache.capacity", "10GiB"),
   660  			types.MakeCausets("einsteindb", "192.168.3.33:26700", "storage.causet-cache.capacity", "20GiB"),
   661  			types.MakeCausets("einsteindb", "192.168.3.34:26600", "storage.causet-cache.capacity", "1TiB"),
   662  			types.MakeCausets("einsteindb", "192.168.3.35:26700", "storage.causet-cache.capacity", "20GiB"),
   663  		},
   664  	}
   665  
   666  	// construct some mock abnormal data
   667  	mockData := map[string][][]types.Causet{
   668  		"node_total_memory": {
   669  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.33:26600", 50.0*1024*1024*1024),
   670  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.34:26600", 50.0*1024*1024*1024),
   671  			types.MakeCausets(datetime("2020-02-14 05:20:00"), "192.168.3.35:26600", 50.0*1024*1024*1024),
   672  		},
   673  	}
   674  
   675  	ctx := s.setupForInspection(c, mockData, configurations)
   676  	defer s.tearDownForInspection(c)
   677  
   678  	rs, err := tk.Se.InterDircute(ctx, "select  /*+ time_range('2020-02-14 04:20:00','2020-02-14 05:23:00') */ * from information_schema.inspection_result where rule='config' and item='storage.causet-cache.capacity' order by value")
   679  	c.Assert(err, IsNil)
   680  	result := tk.ResultSetToResultWithCtx(ctx, rs[0], Commentf("execute inspect ALLEGROALLEGROSQL failed"))
   681  	c.Assert(tk.Se.GetStochastikVars().StmtCtx.WarningCount(), Equals, uint16(0), Commentf("unexpected warnings: %+v", tk.Se.GetStochastikVars().StmtCtx.GetWarnings()))
   682  	result.Check(testkit.Events(
   683  		"config storage.causet-cache.capacity einsteindb 192.168.3.34  1099511627776 < 24159191040 warning There are 1 EinsteinDB server in 192.168.3.34 node, the total 'storage.causet-cache.capacity' of EinsteinDB is more than (0.45 * total node memory)",
   684  		"config storage.causet-cache.capacity einsteindb 192.168.3.33  32212254720 < 24159191040 warning There are 2 EinsteinDB server in 192.168.3.33 node, the total 'storage.causet-cache.capacity' of EinsteinDB is more than (0.45 * total node memory)",
   685  	))
   686  }