github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/monitoring/lib/dashboard.libsonnet (about) 1 local grafana = import 'grafonnet/grafana.libsonnet'; 2 3 4 { 5 dashboard: 6 local d = grafana.dashboard.new( 7 'Pyroscope Server', 8 tags=['pyroscope'], 9 time_from='now-1h', 10 uid='tsWRL6ReZQkirFirmyvnWX1akHXJeHT8I8emjGJo', 11 editable='true', 12 refresh = if $._config.benchmark then '5s' else '', 13 ); 14 15 // conditionally add a benchmark rowat the top if appropriate 16 local dashboard = if $._config.benchmark then 17 d 18 .addRow( 19 grafana.row.new( 20 title='Benchmark', 21 ) 22 .addPanel( 23 grafana.gaugePanel.new( 24 'Run Progress', 25 datasource='$PROMETHEUS_DS', 26 unit='percentunit', 27 reducerFunction='lastNotNull', 28 min=0, 29 max=1, 30 ) 31 .addThreshold({ color: 'green', value: 0 }) 32 .addTarget( 33 grafana.prometheus.target( 34 '(pyroscope_benchmark_successful_uploads + pyroscope_benchmark_upload_errors) / pyroscope_benchmark_requests_total', 35 ) 36 ) 37 ) 38 .addPanel( 39 grafana.graphPanel.new( 40 'Upload Errors (Total)', 41 datasource='$PROMETHEUS_DS', 42 span=4, 43 ) 44 .addTarget( 45 grafana.prometheus.target( 46 'pyroscope_benchmark_upload_errors{}', 47 legendFormat='{{ __name__ }}', 48 ) 49 ) 50 ) 51 .addPanel( 52 grafana.graphPanel.new( 53 'Successful Uploads (Total)', 54 datasource='$PROMETHEUS_DS', 55 span=4, 56 ) 57 .addTarget( 58 grafana.prometheus.target( 59 'pyroscope_benchmark_successful_uploads{}', 60 legendFormat='{{ __name__ }}', 61 ) 62 ) 63 ) 64 ) 65 else d; 66 67 dashboard 68 .addTemplate( 69 grafana.template.datasource( 70 name='PROMETHEUS_DS', 71 query='prometheus', 72 current='prometheus', 73 hide='hidden', // anything other than '' and 'label works 74 ) 75 ) 76 .addTemplate( 77 grafana.template.new( 78 'instance', 79 '$PROMETHEUS_DS', 80 'label_values(pyroscope_build_info, instance)', 81 // otherwise the variable may be unpopulated 82 // eg. when prometheus/grafana/pyroscope are started at the same time 83 refresh='time', 84 label='instance', 85 ) 86 ) 87 88 .addRow( 89 grafana.row.new( 90 title='Meta', 91 ) 92 .addPanel( 93 grafana.tablePanel.new( 94 title='', 95 datasource='$PROMETHEUS_DS', 96 span=12, 97 height=10, 98 ) 99 // they don't provide any value 100 .hideColumn("__name__") 101 .hideColumn("Time") 102 .hideColumn("instance") 103 .hideColumn("Value") 104 .hideColumn("job") 105 106 // somewhat useful but preferred to be hidden 107 // to make the table cleaner 108 .hideColumn("use_embedded_assets") 109 .addTarget( 110 grafana.prometheus.target( 111 'pyroscope_build_info{%s}' % $._config.selector, 112 instant=true, 113 format='table', 114 ) 115 ) 116 ) 117 ) 118 119 120 // Only useful when running benchmark 121 122 123 .addRow( 124 grafana.row.new( 125 title='General', 126 ) 127 .addPanel( 128 grafana.graphPanel.new( 129 'Request Latency P99', 130 datasource='$PROMETHEUS_DS', 131 format='seconds', 132 ) 133 .addTarget(grafana.prometheus.target(||| 134 histogram_quantile(0.99, 135 sum(rate(pyroscope_http_request_duration_seconds_bucket{ 136 instance="$instance", 137 handler!="/metrics", 138 handler!="/healthz" 139 }[$__rate_interval])) 140 by (le, handler) 141 ) 142 |||, 143 legendFormat='{{ handler }}', 144 )) 145 ) 146 147 .addPanel( 148 grafana.graphPanel.new( 149 'Error Rate', 150 datasource='$PROMETHEUS_DS', 151 ) 152 .addTarget(grafana.prometheus.target(||| 153 sum(rate(pyroscope_http_request_duration_seconds_count 154 {instance="$instance", code=~"5..", handler!="/metrics", handler!="/healthz"}[$__rate_interval])) by (handler) 155 / 156 sum(rate(pyroscope_http_request_duration_seconds_count{instance="$instance", handler!="/metrics", handler!="/healthz"}[$__rate_interval])) by (handler) 157 |||, 158 legendFormat='{{ handler }}', 159 )) 160 ) 161 162 .addPanel( 163 grafana.graphPanel.new( 164 'Throughput', 165 datasource='$PROMETHEUS_DS', 166 ) 167 .addTarget(grafana.prometheus.target('sum(rate(pyroscope_http_request_duration_seconds_count{instance="$instance", handler!="/metrics", handler!="/healthz"}[$__rate_interval])) by (handler)', 168 legendFormat='{{ handler }}', 169 )) 170 ) 171 172 .addPanel( 173 grafana.graphPanel.new( 174 'Response Size P99', 175 datasource='$PROMETHEUS_DS', 176 format='bytes', 177 ) 178 .addTarget(grafana.prometheus.target('histogram_quantile(0.95, sum(rate(pyroscope_http_response_size_bytes_bucket{instance="$instance", handler!="/metrics", handler!="/healthz"}[$__rate_interval])) by (le, handler))', 179 legendFormat='{{ handler }}', 180 )) 181 ) 182 183 .addPanel( 184 grafana.graphPanel.new( 185 'CPU Utilization', 186 datasource='$PROMETHEUS_DS', 187 format='percent', 188 min='0', 189 max='100', 190 legend_show=false, 191 ) 192 .addTarget( 193 grafana.prometheus.target( 194 'process_cpu_seconds_total{instance="$instance"}', 195 ) 196 ) 197 ) 198 199 ) 200 201 202 .addRow( 203 grafana.row.new( 204 title='Storage', 205 ) 206 .addPanel( 207 grafana.graphPanel.new( 208 'Cache Hit Ratio', 209 datasource='$PROMETHEUS_DS', 210 legend_values='true', 211 legend_rightSide='true', 212 legend_alignAsTable='true', 213 legend_current='true', 214 legend_sort='current', 215 legend_sortDesc=true, 216 format='percentunit', 217 ) 218 .addTarget( 219 grafana.prometheus.target(||| 220 (rate(pyroscope_storage_db_cache_reads_total[$__rate_interval])- 221 rate(pyroscope_storage_db_cache_misses_total[$__rate_interval])) 222 / 223 rate(pyroscope_storage_db_cache_reads_total[$__rate_interval]) 224 |||, 225 legendFormat='{{ name }}', 226 ) 227 ) 228 ) 229 .addPanel( 230 grafana.graphPanel.new( 231 'Cache disk IO', 232 datasource='$PROMETHEUS_DS', 233 legend_values='true', 234 legend_rightSide='true', 235 legend_alignAsTable='true', 236 legend_current='true', 237 legend_sort='current', 238 legend_sortDesc=true, 239 format='Bps', 240 ) 241 .addTarget( 242 grafana.prometheus.target( 243 'rate(pyroscope_storage_db_cache_write_bytes_sum[$__rate_interval])*-1', 244 legendFormat="Writes - {{name}}", 245 ) 246 ) 247 .addTarget( 248 grafana.prometheus.target( 249 'rate(pyroscope_storage_db_cache_read_bytes_sum[$__rate_interval])', 250 legendFormat="Reads - {{name}}", 251 ) 252 ) 253 ) 254 255 .addPanel( 256 grafana.graphPanel.new( 257 'Storage Reads/Writes', 258 datasource='$PROMETHEUS_DS', 259 ) 260 .addTarget( 261 grafana.prometheus.target( 262 'rate(pyroscope_storage_reads_total[$__rate_interval])', 263 legendFormat="Reads", 264 ) 265 ) 266 .addTarget( 267 grafana.prometheus.target( 268 'rate(pyroscope_storage_writes_total[$__rate_interval])', 269 legendFormat="Writes", 270 ) 271 ) 272 ) 273 .addPanel( 274 grafana.graphPanel.new( 275 'Periodic tasks', 276 datasource='$PROMETHEUS_DS', 277 legend_values='true', 278 format='seconds', 279 logBase1Y=2, 280 ) 281 .addTarget( 282 grafana.prometheus.target( 283 'pyroscope_storage_eviction_task_duration_seconds{quantile="0.99"}', 284 legendFormat='evictions', 285 ), 286 ) 287 .addTarget( 288 grafana.prometheus.target( 289 'pyroscope_storage_writeback_task_duration_seconds{quantile="0.99"}', 290 legendFormat='write-back', 291 ), 292 ) 293 .addTarget( 294 grafana.prometheus.target( 295 'pyroscope_storage_retention_task_duration_seconds{quantile="0.99"}', 296 legendFormat='retention', 297 ), 298 ) 299 ) 300 301 .addPanel( 302 grafana.graphPanel.new( 303 'Disk Usage', 304 datasource='$PROMETHEUS_DS', 305 format='bytes', 306 legend_values='true', 307 legend_rightSide='true', 308 legend_alignAsTable='true', 309 legend_current='true', 310 legend_sort='current', 311 legend_sortDesc=true, 312 ) 313 .addTarget( 314 grafana.prometheus.target( 315 'pyroscope_storage_db_size_bytes', 316 legendFormat='{{ name }}', 317 ), 318 ) 319 .addTarget( 320 grafana.prometheus.target( 321 'sum without(name)(pyroscope_storage_db_size_bytes)', 322 legendFormat='total', 323 ), 324 ) 325 ) 326 327 .addPanel( 328 grafana.graphPanel.new( 329 'Cache Size (number of items)', 330 datasource='$PROMETHEUS_DS', 331 legend_values='true', 332 legend_rightSide='true', 333 legend_alignAsTable='true', 334 legend_current='true', 335 legend_sort='current', 336 legend_sortDesc=true, 337 ) 338 .addTarget( 339 grafana.prometheus.target( 340 'pyroscope_storage_db_cache_size', 341 legendFormat='{{ name }}', 342 ), 343 ) 344 .addTarget( 345 grafana.prometheus.target( 346 'sum without(name)(pyroscope_storage_db_cache_size)', 347 legendFormat='total', 348 ), 349 ) 350 ) 351 ) 352 353 // inspired by 354 // https://github.com/aukhatov/grafana-dashboards/blob/master/Go%20Metrics-1567509764849.json 355 .addRow( 356 grafana.row.new( 357 title='Go', 358 collapse=if $._config.benchmark then false else true, 359 ) 360 .addPanel( 361 grafana.graphPanel.new( 362 'Memory Off-heap', 363 datasource='$PROMETHEUS_DS', 364 format='bytes', 365 ) 366 .addTarget( 367 grafana.prometheus.target( 368 'go_memstats_mspan_inuse_bytes{instance="$instance"}', 369 legendFormat='{{ __name__ }}', 370 ) 371 ) 372 .addTarget( 373 grafana.prometheus.target( 374 'go_memstats_mspan_sys_bytes{instance="$instance"}', 375 legendFormat='{{ __name__ }}', 376 ) 377 ) 378 .addTarget(grafana.prometheus.target( 379 'go_memstats_mcache_inuse_bytes{instance="$instance"}', 380 legendFormat='{{ __name__ }}', 381 )) 382 .addTarget(grafana.prometheus.target( 383 'go_memstats_mcache_sys_bytes{instance="$instance"}', 384 legendFormat='{{ __name__ }}', 385 )) 386 .addTarget(grafana.prometheus.target( 387 'go_memstats_buck_hash_sys_bytes{instance="$instance"}', 388 legendFormat='{{ __name__ }}', 389 )) 390 .addTarget(grafana.prometheus.target( 391 'go_memstats_gc_sys_bytes{instance="$instance"}', 392 legendFormat='{{ __name__ }}', 393 )) 394 .addTarget(grafana.prometheus.target( 395 'go_memstats_other_sys_bytes{instance="$instance"}', 396 legendFormat='{{ __name__ }}', 397 )) 398 .addTarget(grafana.prometheus.target( 399 'go_memstats_next_gc_bytes{instance="$instance"}', 400 legendFormat='{{ __name__ }}', 401 )) 402 ) 403 404 .addPanel( 405 grafana.graphPanel.new( 406 'Memory In Heap', 407 datasource='$PROMETHEUS_DS', 408 format='bytes', 409 ) 410 .addTarget(grafana.prometheus.target( 411 'go_memstats_heap_alloc_bytes{instance="$instance"}', 412 legendFormat='{{ __name__ }}', 413 )) 414 .addTarget(grafana.prometheus.target( 415 'go_memstats_heap_sys_bytes{instance="$instance"}', 416 legendFormat='{{ __name__ }}', 417 )) 418 .addTarget(grafana.prometheus.target( 419 'go_memstats_heap_idle_bytes{instance="$instance"}', 420 legendFormat='{{ __name__ }}', 421 )) 422 .addTarget(grafana.prometheus.target( 423 'go_memstats_heap_inuse_bytes{instance="$instance"}', 424 legendFormat='{{ __name__ }}', 425 )) 426 .addTarget(grafana.prometheus.target( 427 'go_memstats_heap_released_bytes{instance="$instance"}', 428 legendFormat='{{ __name__ }}', 429 )) 430 ) 431 432 433 .addPanel( 434 grafana.graphPanel.new( 435 'Memory In Stack', 436 datasource='$PROMETHEUS_DS', 437 format='decbytes', 438 ) 439 .addTarget( 440 grafana.prometheus.target( 441 'go_memstats_stack_inuse_bytes{instance="$instance"}', 442 legendFormat='{{ __name__ }}', 443 ) 444 ) 445 .addTarget( 446 grafana.prometheus.target( 447 'go_memstats_stack_sys_bytes{instance="$instance"}', 448 legendFormat='{{ __name__ }}', 449 ) 450 ) 451 ) 452 453 454 455 .addPanel( 456 grafana.graphPanel.new( 457 'Total Used Memory', 458 datasource='$PROMETHEUS_DS', 459 format='decbytes', 460 ) 461 .addTarget(grafana.prometheus.target( 462 'go_memstats_sys_bytes{instance="$instance"}', 463 legendFormat='{{ __name__ }}', 464 )) 465 ) 466 467 468 .addPanel( 469 grafana.graphPanel.new( 470 'Number of Live Objects', 471 datasource='$PROMETHEUS_DS', 472 legend_show=false, 473 ) 474 .addTarget(grafana.prometheus.target( 475 'go_memstats_mallocs_total{instance="$instance"} - go_memstats_frees_total{instance="$instance"}' 476 )) 477 ) 478 479 .addPanel( 480 grafana.graphPanel.new( 481 'Rate of Objects Allocated', 482 datasource='$PROMETHEUS_DS', 483 legend_show=false, 484 ) 485 .addTarget(grafana.prometheus.target('rate(go_memstats_mallocs_total{instance="$instance"}[$__rate_interval])')) 486 ) 487 488 .addPanel( 489 grafana.graphPanel.new( 490 'Rates of Allocation', 491 datasource='$PROMETHEUS_DS', 492 format="Bps", 493 legend_show=false, 494 ) 495 .addTarget(grafana.prometheus.target('rate(go_memstats_alloc_bytes_total{instance="$instance"}[$__rate_interval])')) 496 ) 497 498 .addPanel( 499 grafana.graphPanel.new( 500 'Goroutines', 501 datasource='$PROMETHEUS_DS', 502 legend_show=false, 503 ) 504 .addTarget(grafana.prometheus.target('go_goroutines{instance="$instance"}')) 505 ) 506 507 .addPanel( 508 grafana.graphPanel.new( 509 'GC duration quantile', 510 datasource='$PROMETHEUS_DS', 511 legend_show=false, 512 ) 513 .addTarget(grafana.prometheus.target('go_gc_duration_seconds{instance="$instance"}')) 514 ) 515 516 .addPanel( 517 grafana.graphPanel.new( 518 'File descriptors', 519 datasource='$PROMETHEUS_DS', 520 ) 521 .addTarget(grafana.prometheus.target( 522 'process_open_fds{instance="$instance"}', 523 legendFormat='{{ __name__ }}', 524 )) 525 .addTarget(grafana.prometheus.target( 526 'process_max_fds{instance="$instance"}', 527 legendFormat='{{ __name__ }}', 528 )) 529 ) 530 ) 531 }