storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/madmin/health.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 package madmin 19 20 import ( 21 "context" 22 "encoding/json" 23 "io" 24 "net/http" 25 "net/url" 26 "time" 27 28 "github.com/shirou/gopsutil/v3/cpu" 29 diskhw "github.com/shirou/gopsutil/v3/disk" 30 "github.com/shirou/gopsutil/v3/host" 31 "github.com/shirou/gopsutil/v3/mem" 32 "github.com/shirou/gopsutil/v3/process" 33 34 "storj.io/minio/pkg/disk" 35 "storj.io/minio/pkg/net" 36 smart "storj.io/minio/pkg/smart" 37 ) 38 39 // HealthInfo - MinIO cluster's health Info 40 type HealthInfo struct { 41 TimeStamp time.Time `json:"timestamp,omitempty"` 42 Error string `json:"error,omitempty"` 43 Perf PerfInfo `json:"perf,omitempty"` 44 Minio MinioHealthInfo `json:"minio,omitempty"` 45 Sys SysHealthInfo `json:"sys,omitempty"` 46 } 47 48 // SysHealthInfo - Includes hardware and system information of the MinIO cluster 49 type SysHealthInfo struct { 50 CPUInfo []ServerCPUInfo `json:"cpus,omitempty"` 51 DiskHwInfo []ServerDiskHwInfo `json:"drives,omitempty"` 52 OsInfo []ServerOsInfo `json:"osinfos,omitempty"` 53 MemInfo []ServerMemInfo `json:"meminfos,omitempty"` 54 ProcInfo []ServerProcInfo `json:"procinfos,omitempty"` 55 Error string `json:"error,omitempty"` 56 } 57 58 // ServerProcInfo - Includes host process lvl information 59 type ServerProcInfo struct { 60 Addr string `json:"addr"` 61 Processes []SysProcess `json:"processes,omitempty"` 62 Error string `json:"error,omitempty"` 63 } 64 65 // SysProcess - Includes process lvl information about a single process 66 type SysProcess struct { 67 Pid int32 `json:"pid"` 68 Background bool `json:"background,omitempty"` 69 CPUPercent float64 `json:"cpupercent,omitempty"` 70 Children []int32 `json:"children,omitempty"` 71 CmdLine string `json:"cmd,omitempty"` 72 ConnectionCount int `json:"connection_count,omitempty"` 73 CreateTime int64 `json:"createtime,omitempty"` 74 Cwd string `json:"cwd,omitempty"` 75 Exe string `json:"exe,omitempty"` 76 Gids []int32 `json:"gids,omitempty"` 77 IOCounters *process.IOCountersStat `json:"iocounters,omitempty"` 78 IsRunning bool `json:"isrunning,omitempty"` 79 MemInfo *process.MemoryInfoStat `json:"meminfo,omitempty"` 80 MemMaps *[]process.MemoryMapsStat `json:"memmaps,omitempty"` 81 MemPercent float32 `json:"mempercent,omitempty"` 82 Name string `json:"name,omitempty"` 83 Nice int32 `json:"nice,omitempty"` 84 NumCtxSwitches *process.NumCtxSwitchesStat `json:"numctxswitches,omitempty"` 85 NumFds int32 `json:"numfds,omitempty"` 86 NumThreads int32 `json:"numthreads,omitempty"` 87 PageFaults *process.PageFaultsStat `json:"pagefaults,omitempty"` 88 Parent int32 `json:"parent,omitempty"` 89 Ppid int32 `json:"ppid,omitempty"` 90 Status string `json:"status,omitempty"` 91 Tgid int32 `json:"tgid,omitempty"` 92 Times *cpu.TimesStat `json:"cputimes,omitempty"` 93 Uids []int32 `json:"uids,omitempty"` 94 Username string `json:"username,omitempty"` 95 } 96 97 // ServerMemInfo - Includes host virtual and swap mem information 98 type ServerMemInfo struct { 99 Addr string `json:"addr"` 100 SwapMem *mem.SwapMemoryStat `json:"swap,omitempty"` 101 VirtualMem *mem.VirtualMemoryStat `json:"virtualmem,omitempty"` 102 Error string `json:"error,omitempty"` 103 } 104 105 // ServerOsInfo - Includes host os information 106 type ServerOsInfo struct { 107 Addr string `json:"addr"` 108 Info *host.InfoStat `json:"info,omitempty"` 109 Sensors []host.TemperatureStat `json:"sensors,omitempty"` 110 Users []host.UserStat `json:"users,omitempty"` 111 Error string `json:"error,omitempty"` 112 } 113 114 // ServerCPUInfo - Includes cpu and timer stats of each node of the MinIO cluster 115 type ServerCPUInfo struct { 116 Addr string `json:"addr"` 117 CPUStat []cpu.InfoStat `json:"cpu,omitempty"` 118 TimeStat []cpu.TimesStat `json:"time,omitempty"` 119 Error string `json:"error,omitempty"` 120 } 121 122 // MinioHealthInfo - Includes MinIO confifuration information 123 type MinioHealthInfo struct { 124 Info InfoMessage `json:"info,omitempty"` 125 Config interface{} `json:"config,omitempty"` 126 Error string `json:"error,omitempty"` 127 } 128 129 // ServerDiskHwInfo - Includes usage counters, disk counters and partitions 130 type ServerDiskHwInfo struct { 131 Addr string `json:"addr"` 132 Usage []*diskhw.UsageStat `json:"usages,omitempty"` 133 Partitions []PartitionStat `json:"partitions,omitempty"` 134 Counters map[string]diskhw.IOCountersStat `json:"counters,omitempty"` 135 Error string `json:"error,omitempty"` 136 } 137 138 // PartitionStat - includes data from both shirou/psutil.diskHw.PartitionStat as well as SMART data 139 type PartitionStat struct { 140 Device string `json:"device"` 141 Mountpoint string `json:"mountpoint,omitempty"` 142 Fstype string `json:"fstype,omitempty"` 143 Opts string `json:"opts,omitempty"` 144 SmartInfo smart.Info `json:"smartInfo,omitempty"` 145 } 146 147 // PerfInfo - Includes Drive and Net perf info for the entire MinIO cluster 148 type PerfInfo struct { 149 DriveInfo []ServerDrivesInfo `json:"drives,omitempty"` 150 Net []ServerNetHealthInfo `json:"net,omitempty"` 151 NetParallel ServerNetHealthInfo `json:"net_parallel,omitempty"` 152 Error string `json:"error,omitempty"` 153 } 154 155 // ServerDrivesInfo - Drive info about all drives in a single MinIO node 156 type ServerDrivesInfo struct { 157 Addr string `json:"addr"` 158 Serial []DrivePerfInfo `json:"serial,omitempty"` // Drive perf info collected one drive at a time 159 Parallel []DrivePerfInfo `json:"parallel,omitempty"` // Drive perf info collected in parallel 160 Error string `json:"error,omitempty"` 161 } 162 163 // DrivePerfInfo - Stats about a single drive in a MinIO node 164 type DrivePerfInfo struct { 165 Path string `json:"endpoint"` 166 Latency disk.Latency `json:"latency,omitempty"` 167 Throughput disk.Throughput `json:"throughput,omitempty"` 168 Error string `json:"error,omitempty"` 169 } 170 171 // ServerNetHealthInfo - Network health info about a single MinIO node 172 type ServerNetHealthInfo struct { 173 Addr string `json:"addr"` 174 Net []NetPerfInfo `json:"net,omitempty"` 175 Error string `json:"error,omitempty"` 176 } 177 178 // NetPerfInfo - one-to-one network connectivity Stats between 2 MinIO nodes 179 type NetPerfInfo struct { 180 Addr string `json:"remote"` 181 Latency net.Latency `json:"latency,omitempty"` 182 Throughput net.Throughput `json:"throughput,omitempty"` 183 Error string `json:"error,omitempty"` 184 } 185 186 // HealthDataType - Typed Health data types 187 type HealthDataType string 188 189 // HealthDataTypes 190 const ( 191 HealthDataTypePerfDrive HealthDataType = "perfdrive" 192 HealthDataTypePerfNet HealthDataType = "perfnet" 193 HealthDataTypeMinioInfo HealthDataType = "minioinfo" 194 HealthDataTypeMinioConfig HealthDataType = "minioconfig" 195 HealthDataTypeSysCPU HealthDataType = "syscpu" 196 HealthDataTypeSysDiskHw HealthDataType = "sysdiskhw" 197 HealthDataTypeSysDocker HealthDataType = "sysdocker" // is this really needed? 198 HealthDataTypeSysOsInfo HealthDataType = "sysosinfo" 199 HealthDataTypeSysLoad HealthDataType = "sysload" // provides very little info. Making it TBD 200 HealthDataTypeSysMem HealthDataType = "sysmem" 201 HealthDataTypeSysNet HealthDataType = "sysnet" 202 HealthDataTypeSysProcess HealthDataType = "sysprocess" 203 ) 204 205 // HealthDataTypesMap - Map of Health datatypes 206 var HealthDataTypesMap = map[string]HealthDataType{ 207 "perfdrive": HealthDataTypePerfDrive, 208 "perfnet": HealthDataTypePerfNet, 209 "minioinfo": HealthDataTypeMinioInfo, 210 "minioconfig": HealthDataTypeMinioConfig, 211 "syscpu": HealthDataTypeSysCPU, 212 "sysdiskhw": HealthDataTypeSysDiskHw, 213 "sysdocker": HealthDataTypeSysDocker, 214 "sysosinfo": HealthDataTypeSysOsInfo, 215 "sysload": HealthDataTypeSysLoad, 216 "sysmem": HealthDataTypeSysMem, 217 "sysnet": HealthDataTypeSysNet, 218 "sysprocess": HealthDataTypeSysProcess, 219 } 220 221 // HealthDataTypesList - List of Health datatypes 222 var HealthDataTypesList = []HealthDataType{ 223 HealthDataTypePerfDrive, 224 HealthDataTypePerfNet, 225 HealthDataTypeMinioInfo, 226 HealthDataTypeMinioConfig, 227 HealthDataTypeSysCPU, 228 HealthDataTypeSysDiskHw, 229 HealthDataTypeSysDocker, 230 HealthDataTypeSysOsInfo, 231 HealthDataTypeSysLoad, 232 HealthDataTypeSysMem, 233 HealthDataTypeSysNet, 234 HealthDataTypeSysProcess, 235 } 236 237 // ServerHealthInfo - Connect to a minio server and call Health Info Management API 238 // to fetch server's information represented by HealthInfo structure 239 func (adm *AdminClient) ServerHealthInfo(ctx context.Context, healthDataTypes []HealthDataType, deadline time.Duration) <-chan HealthInfo { 240 respChan := make(chan HealthInfo) 241 go func() { 242 v := url.Values{} 243 244 v.Set("deadline", 245 deadline.Truncate(1*time.Second).String()) 246 247 // start with all set to false 248 for _, d := range HealthDataTypesList { 249 v.Set(string(d), "false") 250 } 251 252 // only 'trueify' user provided values 253 for _, d := range healthDataTypes { 254 v.Set(string(d), "true") 255 } 256 var healthInfoMessage HealthInfo 257 healthInfoMessage.TimeStamp = time.Now() 258 259 resp, err := adm.executeMethod(ctx, "GET", requestData{ 260 relPath: adminAPIPrefix + "/healthinfo", 261 queryValues: v, 262 }) 263 264 defer closeResponse(resp) 265 if err != nil { 266 respChan <- HealthInfo{ 267 Error: err.Error(), 268 } 269 close(respChan) 270 return 271 } 272 273 // Check response http status code 274 if resp.StatusCode != http.StatusOK { 275 respChan <- HealthInfo{ 276 Error: httpRespToErrorResponse(resp).Error(), 277 } 278 return 279 } 280 281 // Unmarshal the server's json response 282 decoder := json.NewDecoder(resp.Body) 283 for { 284 err := decoder.Decode(&healthInfoMessage) 285 healthInfoMessage.TimeStamp = time.Now() 286 287 if err == io.EOF { 288 break 289 } 290 if err != nil { 291 respChan <- HealthInfo{ 292 Error: err.Error(), 293 } 294 } 295 respChan <- healthInfoMessage 296 } 297 298 respChan <- healthInfoMessage 299 300 if v.Get(string(HealthDataTypeMinioInfo)) == "true" { 301 info, err := adm.ServerInfo(ctx) 302 if err != nil { 303 respChan <- HealthInfo{ 304 Error: err.Error(), 305 } 306 return 307 } 308 healthInfoMessage.Minio.Info = info 309 respChan <- healthInfoMessage 310 } 311 312 close(respChan) 313 }() 314 return respChan 315 } 316 317 // GetTotalCapacity gets the total capacity a server holds. 318 func (s *ServerDiskHwInfo) GetTotalCapacity() (capacity uint64) { 319 for _, u := range s.Usage { 320 capacity += u.Total 321 } 322 return 323 } 324 325 // GetTotalFreeCapacity gets the total capacity that is free. 326 func (s *ServerDiskHwInfo) GetTotalFreeCapacity() (capacity uint64) { 327 for _, u := range s.Usage { 328 capacity += u.Free 329 } 330 return 331 } 332 333 // GetTotalUsedCapacity gets the total capacity used. 334 func (s *ServerDiskHwInfo) GetTotalUsedCapacity() (capacity uint64) { 335 for _, u := range s.Usage { 336 capacity += u.Used 337 } 338 return 339 }