github.com/matrixorigin/matrixone@v1.2.0/pkg/util/metric/v2/dashboard/grafana_dashboard_logtail.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dashboard
    16  
    17  import (
    18  	"context"
    19  
    20  	"github.com/K-Phoen/grabana/axis"
    21  	"github.com/K-Phoen/grabana/dashboard"
    22  )
    23  
    24  func (c *DashboardCreator) initLogTailDashboard() error {
    25  	folder, err := c.createFolder(moFolderName)
    26  	if err != nil {
    27  		return err
    28  	}
    29  
    30  	build, err := dashboard.New(
    31  		"Logtail Metrics",
    32  		c.withRowOptions(
    33  			c.initLogtailOverviewRow(),
    34  			c.initLogtailQueueRow(),
    35  			c.initLogtailBytesRow(),
    36  			c.initLogtailLoadCheckpointRow(),
    37  			c.initLogtailCollectRow(),
    38  			c.initLogtailSubscriptionRow(),
    39  			c.initLogtailUpdatePartitionRow(),
    40  		)...)
    41  	if err != nil {
    42  		return err
    43  	}
    44  	_, err = c.cli.UpsertDashboard(context.Background(), folder, build)
    45  	return err
    46  }
    47  
    48  func (c *DashboardCreator) initLogtailCollectRow() dashboard.Option {
    49  	return dashboard.Row(
    50  		"Logtail collect duration",
    51  		c.getHistogram(
    52  			"collect duration",
    53  			c.getMetricWithFilter("mo_logtail_collect_duration_seconds_bucket", ``),
    54  			[]float64{0.50, 0.8, 0.90, 0.99},
    55  			12,
    56  			axis.Unit("s"),
    57  			axis.Min(0)),
    58  	)
    59  }
    60  
    61  func (c *DashboardCreator) initLogtailSubscriptionRow() dashboard.Option {
    62  	return dashboard.Row(
    63  		"logtail subscription the tn have received",
    64  		c.withGraph(
    65  			"logtail subscription average increase",
    66  			6,
    67  			`sum(increase(`+c.getMetricWithFilter("mo_logtail_subscription_request_total", "")+`[$interval])) by (`+c.by+`)`,
    68  			"{{ "+c.by+" }}"),
    69  		c.withGraph(
    70  			"logtail subscription average increase, sensitive",
    71  			6,
    72  			`sum(increase(`+c.getMetricWithFilter("mo_logtail_subscription_request_total", "")+`[$interval])) by (`+c.by+`)`,
    73  			"{{ "+c.by+" }}"),
    74  	)
    75  }
    76  
    77  func (c *DashboardCreator) initLogtailQueueRow() dashboard.Option {
    78  	return dashboard.Row(
    79  		"Logtail Queue Status",
    80  		c.withMultiGraph(
    81  			"CN Received status",
    82  			4,
    83  			[]string{
    84  				`sum(rate(` + c.getMetricWithFilter("mo_logtail_received_total", `type="total"`) + `[$interval])) by (` + c.by + `)`,
    85  				`sum(rate(` + c.getMetricWithFilter("mo_logtail_received_total", `type="subscribe"`) + `[$interval])) by (` + c.by + `)`,
    86  				`sum(rate(` + c.getMetricWithFilter("mo_logtail_received_total", `type="unsubscribe"`) + `[$interval])) by (` + c.by + `)`,
    87  				`sum(rate(` + c.getMetricWithFilter("mo_logtail_received_total", `type="update"`) + `[$interval])) by (` + c.by + `)`,
    88  				`sum(rate(` + c.getMetricWithFilter("mo_logtail_received_total", `type="heartbeat"`) + `[$interval])) by (` + c.by + `)`,
    89  			},
    90  			[]string{
    91  				"{{ " + c.by + " }}: total",
    92  				"{{ " + c.by + " }}: subscribe",
    93  				"{{ " + c.by + " }}: unsubscribe",
    94  				"{{ " + c.by + " }}: update",
    95  				"{{ " + c.by + " }}: heartbeat",
    96  			}),
    97  
    98  		c.withMultiGraph(
    99  			"Queue status",
   100  			4,
   101  			[]string{
   102  				`sum(` + c.getMetricWithFilter("mo_logtail_queue_size", `type="send"`) + `)`,
   103  				`sum(` + c.getMetricWithFilter("mo_logtail_queue_size", `type="receive"`) + `)`,
   104  				`sum(` + c.getMetricWithFilter("mo_logtail_queue_size", `type="apply"`) + `)`,
   105  			},
   106  			[]string{
   107  				"send",
   108  				"receive",
   109  				"apply",
   110  			}),
   111  		c.withGraph(
   112  			"Checkpoint logtail",
   113  			4,
   114  			`sum(rate(`+c.getMetricWithFilter("mo_logtail_load_checkpoint_total", "")+`[$interval])) by (`+c.by+`)`,
   115  			"{{ "+c.by+" }}"),
   116  	)
   117  }
   118  
   119  func (c *DashboardCreator) initLogtailBytesRow() dashboard.Option {
   120  	return dashboard.Row(
   121  		"Logtail size",
   122  		c.getHistogram(
   123  			"Logtail size",
   124  			c.getMetricWithFilter(`mo_logtail_bytes_bucket`, ``),
   125  			[]float64{0.50, 0.8, 0.90, 0.99},
   126  			12,
   127  			axis.Unit("bytes"),
   128  			axis.Min(0)),
   129  	)
   130  }
   131  
   132  func (c *DashboardCreator) initLogtailOverviewRow() dashboard.Option {
   133  	return dashboard.Row(
   134  		"Logtail overview",
   135  		c.getMultiHistogram(
   136  			[]string{
   137  				c.getMetricWithFilter(`mo_logtail_append_duration_seconds_bucket`, ``),
   138  				c.getMetricWithFilter(`mo_logtail_send_duration_seconds_bucket`, `step="total"`),
   139  				c.getMetricWithFilter(`mo_logtail_send_duration_seconds_bucket`, `step="latency"`),
   140  				c.getMetricWithFilter(`mo_logtail_send_duration_seconds_bucket`, `step="network"`),
   141  				c.getMetricWithFilter(`mo_logtail_apply_duration_seconds_bucket`, `step="apply"`),
   142  				c.getMetricWithFilter(`mo_logtail_apply_duration_seconds_bucket`, `step="apply-latency"`),
   143  				c.getMetricWithFilter(`mo_logtail_apply_duration_seconds_bucket`, `step="apply-notify"`),
   144  				c.getMetricWithFilter(`mo_logtail_apply_duration_seconds_bucket`, `step="apply-notify-latency"`),
   145  				c.getMetricWithFilter(`mo_txn_commit_duration_seconds_bucket`, `type="cn-wait-logtail"`),
   146  			},
   147  			[]string{
   148  				"append",
   149  				"send",
   150  				"send-latency",
   151  				"send-network",
   152  				"apply",
   153  				"apply-latency",
   154  				"apply-notify",
   155  				"apply-notify-latency",
   156  				"wait-commit-apply",
   157  			},
   158  			[]float64{0.50, 0.8, 0.90, 0.99},
   159  			[]float32{3, 3, 3, 3},
   160  			axis.Unit("s"),
   161  			axis.Min(0))...,
   162  	)
   163  }
   164  
   165  func (c *DashboardCreator) initLogtailUpdatePartitionRow() dashboard.Option {
   166  	return dashboard.Row(
   167  		"Logtail update partition",
   168  		c.getMultiHistogram(
   169  			[]string{
   170  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="enqueue-global-stats"`),
   171  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="get-partition"`),
   172  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="get-lock"`),
   173  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="get-catalog"`),
   174  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="handle-checkpoint"`),
   175  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume"`),
   176  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume-catalog-table"`),
   177  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume-catalog-table"`),
   178  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume-one-entry"`),
   179  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume-one-entry-logtailreplay"`),
   180  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="consume-one-entry-catalog-cache"`),
   181  				c.getMetricWithFilter(`mo_logtail_update_partition_duration_seconds_bucket`, `step="update-timestamps"`),
   182  			},
   183  			[]string{
   184  				"enqueue-global-stats",
   185  				"get-partition",
   186  				"get-lock",
   187  				"get-catalog",
   188  				"handle-checkpoint",
   189  				"consume",
   190  				"consume-catalog-table",
   191  				"consume-catalog-table",
   192  				"consume-one-entry",
   193  				"consume-one-entry-logtailreplay",
   194  				"consume-one-entry-catalog-cache",
   195  				"update-timestamps",
   196  			},
   197  			[]float64{0.50, 0.8, 0.90, 0.99},
   198  			[]float32{3, 3, 3, 3},
   199  			axis.Unit("s"),
   200  			axis.Min(0))...,
   201  	)
   202  }
   203  
   204  func (c *DashboardCreator) initLogtailLoadCheckpointRow() dashboard.Option {
   205  	return dashboard.Row(
   206  		"Logtail load checkpoint",
   207  		c.getHistogram(
   208  			"Logtail load checkpoint",
   209  			c.getMetricWithFilter(`mo_logtail_load_checkpoint_duration_seconds_bucket`, ``),
   210  			[]float64{0.50, 0.8, 0.90, 0.99},
   211  			12,
   212  			axis.Unit("s"),
   213  			axis.Min(0)),
   214  	)
   215  }