github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/v2/dashboard/grafana_dashboard_task.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dashboard 16 17 import ( 18 "context" 19 "fmt" 20 21 "github.com/K-Phoen/grabana/axis" 22 "github.com/K-Phoen/grabana/dashboard" 23 "github.com/K-Phoen/grabana/row" 24 "github.com/K-Phoen/grabana/timeseries" 25 tsaxis "github.com/K-Phoen/grabana/timeseries/axis" 26 ) 27 28 func (c *DashboardCreator) initTaskDashboard() error { 29 folder, err := c.createFolder(moFolderName) 30 if err != nil { 31 return err 32 } 33 34 build, err := dashboard.New( 35 "Task Metrics", 36 c.withRowOptions( 37 c.initTaskFlushTableTailRow(), 38 c.initTaskMergeRow(), 39 c.initTaskMergeTransferPageRow(), 40 c.initCommitTimeRow(), 41 c.initTaskCheckpointRow(), 42 c.initTaskSelectivityRow(), 43 c.initTaskStorageUsageRow(), 44 )...) 45 46 if err != nil { 47 return err 48 } 49 _, err = c.cli.UpsertDashboard(context.Background(), folder, build) 50 return err 51 } 52 53 func (c *DashboardCreator) initTaskMergeTransferPageRow() dashboard.Option { 54 return dashboard.Row( 55 "Task Merge Transfer Page Size", 56 c.withGraph( 57 "Transfer Page Length", 58 12, 59 `sum(`+c.getMetricWithFilter("mo_task_merge_transfer_page_size", ``)+`)`+`* 28 * 1.3`, 60 "{{ "+c.by+" }}", axis.Unit("decbytes")), 61 ) 62 } 63 64 func (c *DashboardCreator) initCommitTimeRow() dashboard.Option { 65 return dashboard.Row( 66 "Commit Time", 67 c.getPercentHist( 68 "Flush Commit Time", 69 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="commit_table_tail"`), 70 []float64{0.50, 0.8, 0.90, 0.99}, 71 SpanNulls(true), 72 timeseries.Span(6), 73 ), 74 c.getPercentHist( 75 "Merge Commit Time", 76 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="commit_merge_objects"`), 77 []float64{0.50, 0.8, 0.90, 0.99}, 78 SpanNulls(true), 79 timeseries.Span(6), 80 ), 81 ) 82 } 83 84 func (c *DashboardCreator) initTaskFlushTableTailRow() dashboard.Option { 85 return dashboard.Row( 86 "Flush Table Tail", 87 c.getTimeSeries( 88 "Flush table tail Count", 89 []string{fmt.Sprintf( 90 "sum by (%s) (increase(%s[$interval]))", 91 c.by, 92 c.getMetricWithFilter(`mo_task_short_duration_seconds_count`, `type="flush_table_tail"`), 93 )}, 94 []string{"Count"}, 95 timeseries.Span(3), 96 ), 97 c.getPercentHist( 98 "Flush table tail Duration", 99 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="flush_table_tail"`), 100 []float64{0.50, 0.8, 0.90, 0.99}, 101 SpanNulls(true), 102 timeseries.Span(3), 103 ), 104 c.getPercentHist( 105 "Flush table deletes count", 106 c.getMetricWithFilter(`mo_task_hist_total_bucket`, `type="flush_deletes_count"`), 107 []float64{0.5, 0.7, 0.8, 0.9}, 108 timeseries.Axis(tsaxis.Unit("")), 109 timeseries.Span(3), 110 SpanNulls(true), 111 ), 112 113 c.getPercentHist( 114 "Flush table deletes file size", 115 c.getMetricWithFilter(`mo_task_hist_bytes_bucket`, `type="flush_deletes_size"`), 116 []float64{0.5, 0.7, 0.8, 0.9}, 117 timeseries.Axis(tsaxis.Unit("decbytes")), 118 timeseries.Span(3), 119 SpanNulls(true), 120 ), 121 ) 122 } 123 124 func (c *DashboardCreator) initTaskMergeRow() dashboard.Option { 125 return dashboard.Row( 126 "Merge", 127 c.getTimeSeries( 128 "Merge Count", 129 []string{ 130 fmt.Sprintf( 131 "sum by (%s) (increase(%s[$interval]))", 132 c.by, 133 c.getMetricWithFilter(`mo_task_scheduled_by_total`, `type="merge"`)), 134 135 fmt.Sprintf( 136 "sum by (%s) (increase(%s[$interval]))", 137 c.by, 138 c.getMetricWithFilter(`mo_task_scheduled_by_total`, `type="merge",nodetype="cn"`)), 139 }, 140 []string{ 141 "Schedule Count", 142 "CN Schedule Count", 143 }, 144 ), 145 c.getTimeSeries( 146 "Merge Batch Size", 147 []string{fmt.Sprintf( 148 "sum by (%s) (increase(%s[$interval]))", 149 c.by, 150 c.getMetricWithFilter(`mo_task_execute_results_total`, `type="merged_size"`))}, 151 []string{"Size"}, 152 timeseries.Axis(tsaxis.Unit("decbytes")), 153 ), 154 ) 155 } 156 157 func (c *DashboardCreator) initTaskCheckpointRow() dashboard.Option { 158 return dashboard.Row( 159 "Checkpoint", 160 c.getPercentHist( 161 "Checkpoint Entry Pending", 162 c.getMetricWithFilter(`mo_task_long_duration_seconds_bucket`, `type="ckp_entry_pending"`), 163 []float64{0.50, 0.8, 0.90, 0.99}, 164 SpanNulls(true), 165 timeseries.Span(12), 166 ), 167 ) 168 } 169 170 func (c *DashboardCreator) initTaskStorageUsageRow() dashboard.Option { 171 rows := c.getMultiHistogram( 172 []string{ 173 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="gckp_collect_usage"`), 174 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="ickp_collect_usage"`), 175 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="handle_usage_request"`), 176 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_get_table_stats"`), 177 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_get_storage_usage"`), 178 c.getMetricWithFilter(`mo_task_short_duration_seconds_bucket`, `type="show_accounts_total_duration"`), 179 }, 180 []string{ 181 "gckp_collect_usage", 182 "ickp_collect_usage", 183 "handle_usage_request", 184 "show_accounts_get_table_stats", 185 "show_accounts_get_storage_usage", 186 "show_accounts_total_duration", 187 }, 188 []float64{0.50, 0.8, 0.90, 0.99}, 189 []float32{3, 3, 3, 3}, 190 axis.Unit("s"), 191 axis.Min(0)) 192 193 rows = append(rows, c.withGraph( 194 "tn storage usage cache mem used", 195 12, 196 `sum(`+c.getMetricWithFilter("mo_task_storage_usage_cache_size", ``)+`)`, 197 "cache mem used", 198 axis.Unit("mb"))) 199 200 return dashboard.Row( 201 "Storage Usage Overview", 202 rows..., 203 ) 204 205 } 206 207 func (c *DashboardCreator) initTaskSelectivityRow() dashboard.Option { 208 209 hitRateFunc := func(title, metricType string) row.Option { 210 return c.getTimeSeries( 211 title, 212 []string{ 213 fmt.Sprintf( 214 "sum(%s) by (%s) / on(%s) sum(%s) by (%s)", 215 c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_hit"`), c.by, c.by, 216 c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_total"`), c.by), 217 }, 218 []string{fmt.Sprintf("filterout-{{ %s }}", c.by)}, 219 timeseries.Span(4), 220 ) 221 } 222 counterRateFunc := func(title, metricType string) row.Option { 223 return c.getTimeSeries( 224 title, 225 []string{ 226 fmt.Sprintf( 227 "sum(rate(%s[$interval])) by (%s)", 228 c.getMetricWithFilter(`mo_task_selectivity`, `type="`+metricType+`_total"`), c.by), 229 }, 230 []string{fmt.Sprintf("req-{{ %s }}", c.by)}, 231 timeseries.Span(4), 232 ) 233 } 234 return dashboard.Row( 235 "Read Selectivity", 236 hitRateFunc("Read filter rate", "readfilter"), 237 hitRateFunc("Block range filter rate", "block"), 238 hitRateFunc("Column update filter rate", "column"), 239 counterRateFunc("Read filter request", "readfilter"), 240 counterRateFunc("Block range request", "block"), 241 counterRateFunc("Column update request", "column"), 242 c.getPercentHist( 243 "Iterate deletes rows count per block", 244 c.getMetricWithFilter(`mo_task_hist_total_bucket`, `type="load_mem_deletes_per_block"`), 245 []float64{0.5, 0.7, 0.8, 0.9}, 246 timeseries.Axis(tsaxis.Unit("")), 247 timeseries.Span(4), 248 SpanNulls(true), 249 ), 250 ) 251 }