volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/metrics.go (about)

     1  /*
     2  Copyright 2023 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vgpu
    18  
    19  import (
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/prometheus/client_golang/prometheus/promauto" // auto-registry collectors in default registry
    22  )
    23  
    24  const (
    25  	// VolcanoNamespace - namespace in prometheus used by volcano
    26  	VolcanoNamespace = "volcano"
    27  
    28  	// OnSessionOpen label
    29  	OnSessionOpen = "OnSessionOpen"
    30  
    31  	// OnSessionClose label
    32  	OnSessionClose = "OnSessionClose"
    33  )
    34  
    35  var (
    36  	VGPUDevicesSharedNumber = promauto.NewGaugeVec(
    37  		prometheus.GaugeOpts{
    38  			Subsystem: VolcanoNamespace,
    39  			Name:      "vgpu_device_shared_number",
    40  			Help:      "The number of vgpu tasks sharing this card",
    41  		},
    42  		[]string{"devID"},
    43  	)
    44  	VGPUDevicesSharedMemory = promauto.NewGaugeVec(
    45  		prometheus.GaugeOpts{
    46  			Subsystem: VolcanoNamespace,
    47  			Name:      "vgpu_device_allocated_memory",
    48  			Help:      "The number of vgpu memory allocated in this card",
    49  		},
    50  		[]string{"devID"},
    51  	)
    52  	VGPUDevicesSharedCores = promauto.NewGaugeVec(
    53  		prometheus.GaugeOpts{
    54  			Subsystem: VolcanoNamespace,
    55  			Name:      "vgpu_device_allocated_cores",
    56  			Help:      "The percentage of gpu compute cores allocated in this card",
    57  		},
    58  		[]string{"devID"},
    59  	)
    60  	VGPUDevicesMemoryLimit = promauto.NewGaugeVec(
    61  		prometheus.GaugeOpts{
    62  			Subsystem: VolcanoNamespace,
    63  			Name:      "vgpu_device_memory_limit",
    64  			Help:      "The number of total device memory allocated in this card",
    65  		},
    66  		[]string{"devID"},
    67  	)
    68  )
    69  
    70  func (gs *GPUDevices) GetStatus() string {
    71  	for _, val := range gs.Device {
    72  		if val != nil {
    73  			VGPUDevicesSharedNumber.WithLabelValues(val.UUID).Set(float64(val.UsedNum))
    74  			VGPUDevicesSharedMemory.WithLabelValues(val.UUID).Set(float64(val.UsedMem))
    75  			VGPUDevicesMemoryLimit.WithLabelValues(val.UUID).Set(float64(val.Memory))
    76  			VGPUDevicesSharedCores.WithLabelValues(val.UUID).Set(float64(val.UsedCore))
    77  		}
    78  	}
    79  	return ""
    80  }