storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/madmin/health.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   *
    16   */
    17  
    18  package madmin
    19  
    20  import (
    21  	"context"
    22  	"encoding/json"
    23  	"io"
    24  	"net/http"
    25  	"net/url"
    26  	"time"
    27  
    28  	"github.com/shirou/gopsutil/v3/cpu"
    29  	diskhw "github.com/shirou/gopsutil/v3/disk"
    30  	"github.com/shirou/gopsutil/v3/host"
    31  	"github.com/shirou/gopsutil/v3/mem"
    32  	"github.com/shirou/gopsutil/v3/process"
    33  
    34  	"storj.io/minio/pkg/disk"
    35  	"storj.io/minio/pkg/net"
    36  	smart "storj.io/minio/pkg/smart"
    37  )
    38  
    39  // HealthInfo - MinIO cluster's health Info
    40  type HealthInfo struct {
    41  	TimeStamp time.Time       `json:"timestamp,omitempty"`
    42  	Error     string          `json:"error,omitempty"`
    43  	Perf      PerfInfo        `json:"perf,omitempty"`
    44  	Minio     MinioHealthInfo `json:"minio,omitempty"`
    45  	Sys       SysHealthInfo   `json:"sys,omitempty"`
    46  }
    47  
    48  // SysHealthInfo - Includes hardware and system information of the MinIO cluster
    49  type SysHealthInfo struct {
    50  	CPUInfo    []ServerCPUInfo    `json:"cpus,omitempty"`
    51  	DiskHwInfo []ServerDiskHwInfo `json:"drives,omitempty"`
    52  	OsInfo     []ServerOsInfo     `json:"osinfos,omitempty"`
    53  	MemInfo    []ServerMemInfo    `json:"meminfos,omitempty"`
    54  	ProcInfo   []ServerProcInfo   `json:"procinfos,omitempty"`
    55  	Error      string             `json:"error,omitempty"`
    56  }
    57  
    58  // ServerProcInfo - Includes host process lvl information
    59  type ServerProcInfo struct {
    60  	Addr      string       `json:"addr"`
    61  	Processes []SysProcess `json:"processes,omitempty"`
    62  	Error     string       `json:"error,omitempty"`
    63  }
    64  
    65  // SysProcess - Includes process lvl information about a single process
    66  type SysProcess struct {
    67  	Pid             int32                       `json:"pid"`
    68  	Background      bool                        `json:"background,omitempty"`
    69  	CPUPercent      float64                     `json:"cpupercent,omitempty"`
    70  	Children        []int32                     `json:"children,omitempty"`
    71  	CmdLine         string                      `json:"cmd,omitempty"`
    72  	ConnectionCount int                         `json:"connection_count,omitempty"`
    73  	CreateTime      int64                       `json:"createtime,omitempty"`
    74  	Cwd             string                      `json:"cwd,omitempty"`
    75  	Exe             string                      `json:"exe,omitempty"`
    76  	Gids            []int32                     `json:"gids,omitempty"`
    77  	IOCounters      *process.IOCountersStat     `json:"iocounters,omitempty"`
    78  	IsRunning       bool                        `json:"isrunning,omitempty"`
    79  	MemInfo         *process.MemoryInfoStat     `json:"meminfo,omitempty"`
    80  	MemMaps         *[]process.MemoryMapsStat   `json:"memmaps,omitempty"`
    81  	MemPercent      float32                     `json:"mempercent,omitempty"`
    82  	Name            string                      `json:"name,omitempty"`
    83  	Nice            int32                       `json:"nice,omitempty"`
    84  	NumCtxSwitches  *process.NumCtxSwitchesStat `json:"numctxswitches,omitempty"`
    85  	NumFds          int32                       `json:"numfds,omitempty"`
    86  	NumThreads      int32                       `json:"numthreads,omitempty"`
    87  	PageFaults      *process.PageFaultsStat     `json:"pagefaults,omitempty"`
    88  	Parent          int32                       `json:"parent,omitempty"`
    89  	Ppid            int32                       `json:"ppid,omitempty"`
    90  	Status          string                      `json:"status,omitempty"`
    91  	Tgid            int32                       `json:"tgid,omitempty"`
    92  	Times           *cpu.TimesStat              `json:"cputimes,omitempty"`
    93  	Uids            []int32                     `json:"uids,omitempty"`
    94  	Username        string                      `json:"username,omitempty"`
    95  }
    96  
    97  // ServerMemInfo - Includes host virtual and swap mem information
    98  type ServerMemInfo struct {
    99  	Addr       string                 `json:"addr"`
   100  	SwapMem    *mem.SwapMemoryStat    `json:"swap,omitempty"`
   101  	VirtualMem *mem.VirtualMemoryStat `json:"virtualmem,omitempty"`
   102  	Error      string                 `json:"error,omitempty"`
   103  }
   104  
   105  // ServerOsInfo - Includes host os information
   106  type ServerOsInfo struct {
   107  	Addr    string                 `json:"addr"`
   108  	Info    *host.InfoStat         `json:"info,omitempty"`
   109  	Sensors []host.TemperatureStat `json:"sensors,omitempty"`
   110  	Users   []host.UserStat        `json:"users,omitempty"`
   111  	Error   string                 `json:"error,omitempty"`
   112  }
   113  
   114  // ServerCPUInfo - Includes cpu and timer stats of each node of the MinIO cluster
   115  type ServerCPUInfo struct {
   116  	Addr     string          `json:"addr"`
   117  	CPUStat  []cpu.InfoStat  `json:"cpu,omitempty"`
   118  	TimeStat []cpu.TimesStat `json:"time,omitempty"`
   119  	Error    string          `json:"error,omitempty"`
   120  }
   121  
   122  // MinioHealthInfo - Includes MinIO confifuration information
   123  type MinioHealthInfo struct {
   124  	Info   InfoMessage `json:"info,omitempty"`
   125  	Config interface{} `json:"config,omitempty"`
   126  	Error  string      `json:"error,omitempty"`
   127  }
   128  
   129  // ServerDiskHwInfo - Includes usage counters, disk counters and partitions
   130  type ServerDiskHwInfo struct {
   131  	Addr       string                           `json:"addr"`
   132  	Usage      []*diskhw.UsageStat              `json:"usages,omitempty"`
   133  	Partitions []PartitionStat                  `json:"partitions,omitempty"`
   134  	Counters   map[string]diskhw.IOCountersStat `json:"counters,omitempty"`
   135  	Error      string                           `json:"error,omitempty"`
   136  }
   137  
   138  // PartitionStat - includes data from both shirou/psutil.diskHw.PartitionStat as well as SMART data
   139  type PartitionStat struct {
   140  	Device     string     `json:"device"`
   141  	Mountpoint string     `json:"mountpoint,omitempty"`
   142  	Fstype     string     `json:"fstype,omitempty"`
   143  	Opts       string     `json:"opts,omitempty"`
   144  	SmartInfo  smart.Info `json:"smartInfo,omitempty"`
   145  }
   146  
   147  // PerfInfo - Includes Drive and Net perf info for the entire MinIO cluster
   148  type PerfInfo struct {
   149  	DriveInfo   []ServerDrivesInfo    `json:"drives,omitempty"`
   150  	Net         []ServerNetHealthInfo `json:"net,omitempty"`
   151  	NetParallel ServerNetHealthInfo   `json:"net_parallel,omitempty"`
   152  	Error       string                `json:"error,omitempty"`
   153  }
   154  
   155  // ServerDrivesInfo - Drive info about all drives in a single MinIO node
   156  type ServerDrivesInfo struct {
   157  	Addr     string          `json:"addr"`
   158  	Serial   []DrivePerfInfo `json:"serial,omitempty"`   // Drive perf info collected one drive at a time
   159  	Parallel []DrivePerfInfo `json:"parallel,omitempty"` // Drive perf info collected in parallel
   160  	Error    string          `json:"error,omitempty"`
   161  }
   162  
   163  // DrivePerfInfo - Stats about a single drive in a MinIO node
   164  type DrivePerfInfo struct {
   165  	Path       string          `json:"endpoint"`
   166  	Latency    disk.Latency    `json:"latency,omitempty"`
   167  	Throughput disk.Throughput `json:"throughput,omitempty"`
   168  	Error      string          `json:"error,omitempty"`
   169  }
   170  
   171  // ServerNetHealthInfo - Network health info about a single MinIO node
   172  type ServerNetHealthInfo struct {
   173  	Addr  string        `json:"addr"`
   174  	Net   []NetPerfInfo `json:"net,omitempty"`
   175  	Error string        `json:"error,omitempty"`
   176  }
   177  
   178  // NetPerfInfo - one-to-one network connectivity Stats between 2 MinIO nodes
   179  type NetPerfInfo struct {
   180  	Addr       string         `json:"remote"`
   181  	Latency    net.Latency    `json:"latency,omitempty"`
   182  	Throughput net.Throughput `json:"throughput,omitempty"`
   183  	Error      string         `json:"error,omitempty"`
   184  }
   185  
   186  // HealthDataType - Typed Health data types
   187  type HealthDataType string
   188  
   189  // HealthDataTypes
   190  const (
   191  	HealthDataTypePerfDrive   HealthDataType = "perfdrive"
   192  	HealthDataTypePerfNet     HealthDataType = "perfnet"
   193  	HealthDataTypeMinioInfo   HealthDataType = "minioinfo"
   194  	HealthDataTypeMinioConfig HealthDataType = "minioconfig"
   195  	HealthDataTypeSysCPU      HealthDataType = "syscpu"
   196  	HealthDataTypeSysDiskHw   HealthDataType = "sysdiskhw"
   197  	HealthDataTypeSysDocker   HealthDataType = "sysdocker" // is this really needed?
   198  	HealthDataTypeSysOsInfo   HealthDataType = "sysosinfo"
   199  	HealthDataTypeSysLoad     HealthDataType = "sysload" // provides very little info. Making it TBD
   200  	HealthDataTypeSysMem      HealthDataType = "sysmem"
   201  	HealthDataTypeSysNet      HealthDataType = "sysnet"
   202  	HealthDataTypeSysProcess  HealthDataType = "sysprocess"
   203  )
   204  
   205  // HealthDataTypesMap - Map of Health datatypes
   206  var HealthDataTypesMap = map[string]HealthDataType{
   207  	"perfdrive":   HealthDataTypePerfDrive,
   208  	"perfnet":     HealthDataTypePerfNet,
   209  	"minioinfo":   HealthDataTypeMinioInfo,
   210  	"minioconfig": HealthDataTypeMinioConfig,
   211  	"syscpu":      HealthDataTypeSysCPU,
   212  	"sysdiskhw":   HealthDataTypeSysDiskHw,
   213  	"sysdocker":   HealthDataTypeSysDocker,
   214  	"sysosinfo":   HealthDataTypeSysOsInfo,
   215  	"sysload":     HealthDataTypeSysLoad,
   216  	"sysmem":      HealthDataTypeSysMem,
   217  	"sysnet":      HealthDataTypeSysNet,
   218  	"sysprocess":  HealthDataTypeSysProcess,
   219  }
   220  
   221  // HealthDataTypesList - List of Health datatypes
   222  var HealthDataTypesList = []HealthDataType{
   223  	HealthDataTypePerfDrive,
   224  	HealthDataTypePerfNet,
   225  	HealthDataTypeMinioInfo,
   226  	HealthDataTypeMinioConfig,
   227  	HealthDataTypeSysCPU,
   228  	HealthDataTypeSysDiskHw,
   229  	HealthDataTypeSysDocker,
   230  	HealthDataTypeSysOsInfo,
   231  	HealthDataTypeSysLoad,
   232  	HealthDataTypeSysMem,
   233  	HealthDataTypeSysNet,
   234  	HealthDataTypeSysProcess,
   235  }
   236  
   237  // ServerHealthInfo - Connect to a minio server and call Health Info Management API
   238  // to fetch server's information represented by HealthInfo structure
   239  func (adm *AdminClient) ServerHealthInfo(ctx context.Context, healthDataTypes []HealthDataType, deadline time.Duration) <-chan HealthInfo {
   240  	respChan := make(chan HealthInfo)
   241  	go func() {
   242  		v := url.Values{}
   243  
   244  		v.Set("deadline",
   245  			deadline.Truncate(1*time.Second).String())
   246  
   247  		// start with all set to false
   248  		for _, d := range HealthDataTypesList {
   249  			v.Set(string(d), "false")
   250  		}
   251  
   252  		// only 'trueify' user provided values
   253  		for _, d := range healthDataTypes {
   254  			v.Set(string(d), "true")
   255  		}
   256  		var healthInfoMessage HealthInfo
   257  		healthInfoMessage.TimeStamp = time.Now()
   258  
   259  		resp, err := adm.executeMethod(ctx, "GET", requestData{
   260  			relPath:     adminAPIPrefix + "/healthinfo",
   261  			queryValues: v,
   262  		})
   263  
   264  		defer closeResponse(resp)
   265  		if err != nil {
   266  			respChan <- HealthInfo{
   267  				Error: err.Error(),
   268  			}
   269  			close(respChan)
   270  			return
   271  		}
   272  
   273  		// Check response http status code
   274  		if resp.StatusCode != http.StatusOK {
   275  			respChan <- HealthInfo{
   276  				Error: httpRespToErrorResponse(resp).Error(),
   277  			}
   278  			return
   279  		}
   280  
   281  		// Unmarshal the server's json response
   282  		decoder := json.NewDecoder(resp.Body)
   283  		for {
   284  			err := decoder.Decode(&healthInfoMessage)
   285  			healthInfoMessage.TimeStamp = time.Now()
   286  
   287  			if err == io.EOF {
   288  				break
   289  			}
   290  			if err != nil {
   291  				respChan <- HealthInfo{
   292  					Error: err.Error(),
   293  				}
   294  			}
   295  			respChan <- healthInfoMessage
   296  		}
   297  
   298  		respChan <- healthInfoMessage
   299  
   300  		if v.Get(string(HealthDataTypeMinioInfo)) == "true" {
   301  			info, err := adm.ServerInfo(ctx)
   302  			if err != nil {
   303  				respChan <- HealthInfo{
   304  					Error: err.Error(),
   305  				}
   306  				return
   307  			}
   308  			healthInfoMessage.Minio.Info = info
   309  			respChan <- healthInfoMessage
   310  		}
   311  
   312  		close(respChan)
   313  	}()
   314  	return respChan
   315  }
   316  
   317  // GetTotalCapacity gets the total capacity a server holds.
   318  func (s *ServerDiskHwInfo) GetTotalCapacity() (capacity uint64) {
   319  	for _, u := range s.Usage {
   320  		capacity += u.Total
   321  	}
   322  	return
   323  }
   324  
   325  // GetTotalFreeCapacity gets the total capacity that is free.
   326  func (s *ServerDiskHwInfo) GetTotalFreeCapacity() (capacity uint64) {
   327  	for _, u := range s.Usage {
   328  		capacity += u.Free
   329  	}
   330  	return
   331  }
   332  
   333  // GetTotalUsedCapacity gets the total capacity used.
   334  func (s *ServerDiskHwInfo) GetTotalUsedCapacity() (capacity uint64) {
   335  	for _, u := range s.Usage {
   336  		capacity += u.Used
   337  	}
   338  	return
   339  }