github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/v2/dashboard/grafana_dashboard_task.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dashboard
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  
    21  	"github.com/K-Phoen/grabana/axis"
    22  	"github.com/K-Phoen/grabana/dashboard"
    23  	"github.com/K-Phoen/grabana/row"
    24  	"github.com/K-Phoen/grabana/timeseries"
    25  	tsaxis "github.com/K-Phoen/grabana/timeseries/axis"
    26  )
    27  
    28  func (c *DashboardCreator) initTaskDashboard() error {
    29  	folder, err := c.createFolder(moFolderName)
    30  	if err != nil {
    31  		return err
    32  	}
    33  
    34  	build, err := dashboard.New(
    35  		"Task Metrics",
    36  		c.withRowOptions(
    37  			c.initTaskFlushTableTailRow(),
    38  			c.initTaskMergeRow(),
    39  			c.initTaskMergeTransferPageRow(),
    40  			c.initCommitTimeRow(),
    41  			c.initTaskCheckpointRow(),
    42  			c.initTaskSelectivityRow(),
    43  			c.initTaskStorageUsageRow(),
    44  		)...)
    45  
    46  	if err != nil {
    47  		return err
    48  	}
    49  	_, err = c.cli.UpsertDashboard(context.Background(), folder, build)
    50  	return err
    51  }
    52  
    53  func (c *DashboardCreator) initTaskMergeTransferPageRow() dashboard.Option {
    54  	return dashboard.Row(
    55  		"Task Merge Transfer Page Size",
    56  		c.withGraph(
    57  			"Transfer Page Length",
    58  			12,
    59  			`sum(`+c.getMetricWithFilter("mo_task_merge_transfer_page_size", ``)+`)`+`* 28 * 1.3`,
    60  			"{{ "+c.by+" }}", axis.Unit("decbytes")),
    61  	)
    62  }
    63  
    64  func (c *DashboardCreator) initCommitTimeRow() dashboard.Option {
    65  	return dashboard.Row(
    66  		"Commit Time",
    67  		c.getPercentHist(
    68  			"Flush Commit Time",
    69  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="commit_table_tail"`),
    70  			[]float64{0.50, 0.8, 0.90, 0.99},
    71  			SpanNulls(true),
    72  			timeseries.Span(6),
    73  		),
    74  		c.getPercentHist(
    75  			"Merge Commit Time",
    76  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="commit_merge_objects"`),
    77  			[]float64{0.50, 0.8, 0.90, 0.99},
    78  			SpanNulls(true),
    79  			timeseries.Span(6),
    80  		),
    81  	)
    82  }
    83  
    84  func (c *DashboardCreator) initTaskFlushTableTailRow() dashboard.Option {
    85  	return dashboard.Row(
    86  		"Flush Table Tail",
    87  		c.getTimeSeries(
    88  			"Flush table tail Count",
    89  			[]string{fmt.Sprintf(
    90  				"sum by (%s) (increase(%s[$interval]))",
    91  				c.by,
    92  				c.getMetricWithFilter(`mo_task_short_duration_seconds_count`, `type="flush_table_tail"`),
    93  			)},
    94  			[]string{"Count"},
    95  			timeseries.Span(3),
    96  		),
    97  		c.getPercentHist(
    98  			"Flush table tail Duration",
    99  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="flush_table_tail"`),
   100  			[]float64{0.50, 0.8, 0.90, 0.99},
   101  			SpanNulls(true),
   102  			timeseries.Span(3),
   103  		),
   104  		c.getPercentHist(
   105  			"Flush table deletes count",
   106  			c.getMetricWithFilter(`mo_task_hist_total_bucket`, `type="flush_deletes_count"`),
   107  			[]float64{0.5, 0.7, 0.8, 0.9},
   108  			timeseries.Axis(tsaxis.Unit("")),
   109  			timeseries.Span(3),
   110  			SpanNulls(true),
   111  		),
   112  
   113  		c.getPercentHist(
   114  			"Flush table deletes file size",
   115  			c.getMetricWithFilter(`mo_task_hist_bytes_bucket`, `type="flush_deletes_size"`),
   116  			[]float64{0.5, 0.7, 0.8, 0.9},
   117  			timeseries.Axis(tsaxis.Unit("decbytes")),
   118  			timeseries.Span(3),
   119  			SpanNulls(true),
   120  		),
   121  	)
   122  }
   123  
   124  func (c *DashboardCreator) initTaskMergeRow() dashboard.Option {
   125  	return dashboard.Row(
   126  		"Merge",
   127  		c.getTimeSeries(
   128  			"Merge Count",
   129  			[]string{
   130  				fmt.Sprintf(
   131  					"sum by (%s) (increase(%s[$interval]))",
   132  					c.by,
   133  					c.getMetricWithFilter(`mo_task_scheduled_by_total`, `type="merge"`)),
   134  
   135  				fmt.Sprintf(
   136  					"sum by (%s) (increase(%s[$interval]))",
   137  					c.by,
   138  					c.getMetricWithFilter(`mo_task_scheduled_by_total`, `type="merge",nodetype="cn"`)),
   139  			},
   140  			[]string{
   141  				"Schedule Count",
   142  				"CN Schedule Count",
   143  			},
   144  		),
   145  		c.getTimeSeries(
   146  			"Merge Batch Size",
   147  			[]string{fmt.Sprintf(
   148  				"sum by (%s) (increase(%s[$interval]))",
   149  				c.by,
   150  				c.getMetricWithFilter(`mo_task_execute_results_total`, `type="merged_size"`))},
   151  			[]string{"Size"},
   152  			timeseries.Axis(tsaxis.Unit("decbytes")),
   153  		),
   154  	)
   155  }
   156  
   157  func (c *DashboardCreator) initTaskCheckpointRow() dashboard.Option {
   158  	return dashboard.Row(
   159  		"Checkpoint",
   160  		c.getPercentHist(
   161  			"Checkpoint Entry Pending",
   162  			c.getMetricWithFilter(`mo_task_long_duration_seconds_bucket`, `type="ckp_entry_pending"`),
   163  			[]float64{0.50, 0.8, 0.90, 0.99},
   164  			SpanNulls(true),
   165  			timeseries.Span(12),
   166  		),
   167  	)
   168  }
   169  
   170  func (c *DashboardCreator) initTaskStorageUsageRow() dashboard.Option {
   171  	rows := c.getMultiHistogram(
   172  		[]string{
   173  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="gckp_collect_usage"`),
   174  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="ickp_collect_usage"`),
   175  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="handle_usage_request"`),
   176  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_get_table_stats"`),
   177  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_get_storage_usage"`),
   178  			c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_total_duration"`),
   179  		},
   180  		[]string{
   181  			"gckp_collect_usage",
   182  			"ickp_collect_usage",
   183  			"handle_usage_request",
   184  			"show_accounts_get_table_stats",
   185  			"show_accounts_get_storage_usage",
   186  			"show_accounts_total_duration",
   187  		},
   188  		[]float64{0.50, 0.8, 0.90, 0.99},
   189  		[]float32{3, 3, 3, 3},
   190  		axis.Unit("s"),
   191  		axis.Min(0))
   192  
   193  	rows = append(rows, c.withGraph(
   194  		"tn storage usage cache mem used",
   195  		12,
   196  		`sum(`+c.getMetricWithFilter("mo_task_storage_usage_cache_size", ``)+`)`,
   197  		"cache mem used",
   198  		axis.Unit("mb")))
   199  
   200  	return dashboard.Row(
   201  		"Storage Usage Overview",
   202  		rows...,
   203  	)
   204  
   205  }
   206  
   207  func (c *DashboardCreator) initTaskSelectivityRow() dashboard.Option {
   208  
   209  	hitRateFunc := func(title, metricType string) row.Option {
   210  		return c.getTimeSeries(
   211  			title,
   212  			[]string{
   213  				fmt.Sprintf(
   214  					"sum(%s) by (%s) / on(%s) sum(%s) by (%s)",
   215  					c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_hit"`), c.by, c.by,
   216  					c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_total"`), c.by),
   217  			},
   218  			[]string{fmt.Sprintf("filterout-{{ %s }}", c.by)},
   219  			timeseries.Span(4),
   220  		)
   221  	}
   222  	counterRateFunc := func(title, metricType string) row.Option {
   223  		return c.getTimeSeries(
   224  			title,
   225  			[]string{
   226  				fmt.Sprintf(
   227  					"sum(rate(%s[$interval])) by (%s)",
   228  					c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_total"`), c.by),
   229  			},
   230  			[]string{fmt.Sprintf("req-{{ %s }}", c.by)},
   231  			timeseries.Span(4),
   232  		)
   233  	}
   234  	return dashboard.Row(
   235  		"Read Selectivity",
   236  		hitRateFunc("Read filter rate", "readfilter"),
   237  		hitRateFunc("Block range filter rate", "block"),
   238  		hitRateFunc("Column update filter rate", "column"),
   239  		counterRateFunc("Read filter request", "readfilter"),
   240  		counterRateFunc("Block range request", "block"),
   241  		counterRateFunc("Column update request", "column"),
   242  		c.getPercentHist(
   243  			"Iterate deletes rows count per block",
   244  			c.getMetricWithFilter(`mo_task_hist_total_bucket`, `type="load_mem_deletes_per_block"`),
   245  			[]float64{0.5, 0.7, 0.8, 0.9},
   246  			timeseries.Axis(tsaxis.Unit("")),
   247  			timeseries.Span(4),
   248  			SpanNulls(true),
   249  		),
   250  	)
   251  }