github.com/minio/madmin-go/v3@v3.0.51/health.go (about) 1 // 2 // Copyright (c) 2015-2023 MinIO, Inc. 3 // 4 // This file is part of MinIO Object Storage stack 5 // 6 // This program is free software: you can redistribute it and/or modify 7 // it under the terms of the GNU Affero General Public License as 8 // published by the Free Software Foundation, either version 3 of the 9 // License, or (at your option) any later version. 10 // 11 // This program is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU Affero General Public License for more details. 15 // 16 // You should have received a copy of the GNU Affero General Public License 17 // along with this program. If not, see <http://www.gnu.org/licenses/>. 18 // 19 20 package madmin 21 22 import ( 23 "bufio" 24 "context" 25 "encoding/json" 26 "errors" 27 "io" 28 "net/http" 29 "net/url" 30 "os" 31 "os/exec" 32 "path" 33 "path/filepath" 34 "runtime" 35 "strconv" 36 "strings" 37 "syscall" 38 "time" 39 40 "github.com/minio/madmin-go/v3/cgroup" 41 "github.com/minio/madmin-go/v3/kernel" 42 "github.com/prometheus/procfs" 43 "github.com/shirou/gopsutil/v3/cpu" 44 "github.com/shirou/gopsutil/v3/disk" 45 "github.com/shirou/gopsutil/v3/host" 46 "github.com/shirou/gopsutil/v3/mem" 47 "github.com/shirou/gopsutil/v3/process" 48 ) 49 50 const ( 51 // HealthInfoVersion0 is version 0 52 HealthInfoVersion0 = "" 53 // HealthInfoVersion1 is version 1 54 HealthInfoVersion1 = "1" 55 // HealthInfoVersion2 is version 2 56 HealthInfoVersion2 = "2" 57 // HealthInfoVersion3 is version 3 58 HealthInfoVersion3 = "3" 59 // HealthInfoVersion is current health info version. 60 HealthInfoVersion = HealthInfoVersion3 61 ) 62 63 const ( 64 SysErrAuditEnabled = "audit is enabled" 65 SysErrUpdatedbInstalled = "updatedb is installed" 66 ) 67 68 const ( 69 SrvSELinux = "selinux" 70 SrvNotInstalled = "not-installed" 71 ) 72 73 const ( 74 sysClassBlock = "/sys/class/block" 75 runDevDataPfx = "/run/udev/data/b" 76 devDir = "/dev/" 77 devLoopDir = "/dev/loop" 78 ) 79 80 // NodeInfo - Interface to abstract any struct that contains address/endpoint and error fields 81 type NodeInfo interface { 82 GetAddr() string 83 SetAddr(addr string) 84 SetError(err string) 85 } 86 87 // NodeCommon - Common fields across most node-specific health structs 88 type NodeCommon struct { 89 Addr string `json:"addr"` 90 Error string `json:"error,omitempty"` 91 } 92 93 // GetAddr - return the address of the node 94 func (n *NodeCommon) GetAddr() string { 95 return n.Addr 96 } 97 98 // SetAddr - set the address of the node 99 func (n *NodeCommon) SetAddr(addr string) { 100 n.Addr = addr 101 } 102 103 // SetError - set the address of the node 104 func (n *NodeCommon) SetError(err string) { 105 n.Error = err 106 } 107 108 // SysErrors - contains a system error 109 type SysErrors struct { 110 NodeCommon 111 112 Errors []string `json:"errors,omitempty"` 113 } 114 115 // SysServices - info about services that affect minio 116 type SysServices struct { 117 NodeCommon 118 119 Services []SysService `json:"services,omitempty"` 120 } 121 122 // SysConfig - info about services that affect minio 123 type SysConfig struct { 124 NodeCommon 125 126 Config map[string]interface{} `json:"config,omitempty"` 127 } 128 129 // SysService - name and status of a sys service 130 type SysService struct { 131 Name string `json:"name"` 132 Status string `json:"status"` 133 } 134 135 // CPU contains system's CPU information. 136 type CPU struct { 137 VendorID string `json:"vendor_id"` 138 Family string `json:"family"` 139 Model string `json:"model"` 140 Stepping int32 `json:"stepping"` 141 PhysicalID string `json:"physical_id"` 142 ModelName string `json:"model_name"` 143 Mhz float64 `json:"mhz"` 144 CacheSize int32 `json:"cache_size"` 145 Flags []string `json:"flags"` 146 Microcode string `json:"microcode"` 147 Cores int `json:"cores"` // computed 148 } 149 150 // CPUs contains all CPU information of a node. 151 type CPUs struct { 152 NodeCommon 153 154 CPUs []CPU `json:"cpus,omitempty"` 155 CPUFreqStats []CPUFreqStats `json:"freq_stats,omitempty"` 156 } 157 158 // CPUFreqStats CPU frequency stats 159 type CPUFreqStats struct { 160 Name string 161 CpuinfoCurrentFrequency *uint64 162 CpuinfoMinimumFrequency *uint64 163 CpuinfoMaximumFrequency *uint64 164 CpuinfoTransitionLatency *uint64 165 ScalingCurrentFrequency *uint64 166 ScalingMinimumFrequency *uint64 167 ScalingMaximumFrequency *uint64 168 AvailableGovernors string 169 Driver string 170 Governor string 171 RelatedCpus string 172 SetSpeed string 173 } 174 175 // GetCPUs returns system's all CPU information. 176 func GetCPUs(ctx context.Context, addr string) CPUs { 177 infos, err := cpu.InfoWithContext(ctx) 178 if err != nil { 179 return CPUs{ 180 NodeCommon: NodeCommon{ 181 Addr: addr, 182 Error: err.Error(), 183 }, 184 } 185 } 186 187 cpuMap := map[string]CPU{} 188 for _, info := range infos { 189 cpu, found := cpuMap[info.PhysicalID] 190 if found { 191 cpu.Cores++ 192 } else { 193 cpu = CPU{ 194 VendorID: info.VendorID, 195 Family: info.Family, 196 Model: info.Model, 197 Stepping: info.Stepping, 198 PhysicalID: info.PhysicalID, 199 ModelName: info.ModelName, 200 Mhz: info.Mhz, 201 CacheSize: info.CacheSize, 202 Flags: info.Flags, 203 Microcode: info.Microcode, 204 Cores: 1, 205 } 206 } 207 cpuMap[info.PhysicalID] = cpu 208 } 209 210 cpus := []CPU{} 211 for _, cpu := range cpuMap { 212 cpus = append(cpus, cpu) 213 } 214 215 var errMsg string 216 freqStats, err := getCPUFreqStats() 217 if err != nil { 218 errMsg = err.Error() 219 } 220 221 return CPUs{ 222 NodeCommon: NodeCommon{Addr: addr, Error: errMsg}, 223 CPUs: cpus, 224 CPUFreqStats: freqStats, 225 } 226 } 227 228 // Partition contains disk partition's information. 229 type Partition struct { 230 Error string `json:"error,omitempty"` 231 232 Device string `json:"device,omitempty"` 233 Model string `json:"model,omitempty"` 234 Revision string `json:"revision,omitempty"` 235 Mountpoint string `json:"mountpoint,omitempty"` 236 FSType string `json:"fs_type,omitempty"` 237 MountOptions string `json:"mount_options,omitempty"` 238 MountFSType string `json:"mount_fs_type,omitempty"` 239 SpaceTotal uint64 `json:"space_total,omitempty"` 240 SpaceFree uint64 `json:"space_free,omitempty"` 241 InodeTotal uint64 `json:"inode_total,omitempty"` 242 InodeFree uint64 `json:"inode_free,omitempty"` 243 } 244 245 // NetInfo contains information about a network inerface 246 type NetInfo struct { 247 NodeCommon 248 Interface string `json:"interface,omitempty"` 249 Driver string `json:"driver,omitempty"` 250 FirmwareVersion string `json:"firmware_version,omitempty"` 251 } 252 253 // Partitions contains all disk partitions information of a node. 254 type Partitions struct { 255 NodeCommon 256 257 Partitions []Partition `json:"partitions,omitempty"` 258 } 259 260 // driveHwInfo contains hardware information about a drive 261 type driveHwInfo struct { 262 Model string 263 Revision string 264 } 265 266 func getDriveHwInfo(partDevice string) (info driveHwInfo, err error) { 267 partDevName := strings.ReplaceAll(partDevice, devDir, "") 268 devPath := path.Join(sysClassBlock, partDevName, "dev") 269 270 _, err = os.Stat(devPath) 271 if err != nil { 272 return 273 } 274 275 var data []byte 276 data, err = os.ReadFile(devPath) 277 if err != nil { 278 return 279 } 280 281 majorMinor := strings.TrimSpace(string(data)) 282 driveInfoPath := runDevDataPfx + majorMinor 283 284 var f *os.File 285 f, err = os.Open(driveInfoPath) 286 if err != nil { 287 return 288 } 289 defer f.Close() 290 291 buf := bufio.NewScanner(f) 292 for buf.Scan() { 293 field := strings.SplitN(buf.Text(), "=", 2) 294 if len(field) == 2 { 295 if field[0] == "E:ID_MODEL" { 296 info.Model = field[1] 297 } 298 if field[0] == "E:ID_REVISION" { 299 info.Revision = field[1] 300 } 301 if len(info.Model) > 0 && len(info.Revision) > 0 { 302 break 303 } 304 } 305 } 306 307 return 308 } 309 310 // GetPartitions returns all disk partitions information of a node running linux only operating system. 311 func GetPartitions(ctx context.Context, addr string) Partitions { 312 if runtime.GOOS != "linux" { 313 return Partitions{ 314 NodeCommon: NodeCommon{ 315 Addr: addr, 316 Error: "unsupported operating system " + runtime.GOOS, 317 }, 318 } 319 } 320 321 parts, err := disk.PartitionsWithContext(ctx, false) 322 if err != nil { 323 return Partitions{ 324 NodeCommon: NodeCommon{ 325 Addr: addr, 326 Error: err.Error(), 327 }, 328 } 329 } 330 331 partitions := []Partition{} 332 333 for i := range parts { 334 usage, err := disk.UsageWithContext(ctx, parts[i].Mountpoint) 335 if err != nil { 336 partitions = append(partitions, Partition{ 337 Device: parts[i].Device, 338 Error: err.Error(), 339 }) 340 } else { 341 var di driveHwInfo 342 device := parts[i].Device 343 if strings.HasPrefix(device, devDir) && !strings.HasPrefix(device, devLoopDir) { 344 // ignore any error in finding device model 345 di, _ = getDriveHwInfo(device) 346 } 347 348 partitions = append(partitions, Partition{ 349 Device: device, 350 Mountpoint: parts[i].Mountpoint, 351 FSType: parts[i].Fstype, 352 MountOptions: strings.Join(parts[i].Opts, ","), 353 MountFSType: usage.Fstype, 354 SpaceTotal: usage.Total, 355 SpaceFree: usage.Free, 356 InodeTotal: usage.InodesTotal, 357 InodeFree: usage.InodesFree, 358 Model: di.Model, 359 Revision: di.Revision, 360 }) 361 } 362 } 363 364 return Partitions{ 365 NodeCommon: NodeCommon{Addr: addr}, 366 Partitions: partitions, 367 } 368 } 369 370 // OSInfo contains operating system's information. 371 type OSInfo struct { 372 NodeCommon 373 374 Info host.InfoStat `json:"info,omitempty"` 375 Sensors []host.TemperatureStat `json:"sensors,omitempty"` 376 } 377 378 // TimeInfo contains current time with timezone, and 379 // the roundtrip duration when fetching it remotely 380 type TimeInfo struct { 381 CurrentTime time.Time `json:"current_time"` 382 RoundtripDuration int32 `json:"roundtrip_duration"` 383 TimeZone string `json:"time_zone"` 384 } 385 386 // XFSErrorConfigs - stores the error configs of all XFS devices on the server 387 type XFSErrorConfigs struct { 388 Configs []XFSErrorConfig `json:"configs,omitempty"` 389 Error string `json:"error,omitempty"` 390 } 391 392 // XFSErrorConfig - stores XFS error configuration info for max_retries 393 type XFSErrorConfig struct { 394 ConfigFile string `json:"config_file"` 395 MaxRetries int `json:"max_retries"` 396 } 397 398 // GetOSInfo returns linux only operating system's information. 399 func GetOSInfo(ctx context.Context, addr string) OSInfo { 400 if runtime.GOOS != "linux" { 401 return OSInfo{ 402 NodeCommon: NodeCommon{ 403 Addr: addr, 404 Error: "unsupported operating system " + runtime.GOOS, 405 }, 406 } 407 } 408 409 kr, err := kernel.CurrentRelease() 410 if err != nil { 411 return OSInfo{ 412 NodeCommon: NodeCommon{ 413 Addr: addr, 414 Error: err.Error(), 415 }, 416 } 417 } 418 419 info, err := host.InfoWithContext(ctx) 420 if err != nil { 421 return OSInfo{ 422 NodeCommon: NodeCommon{ 423 Addr: addr, 424 Error: err.Error(), 425 }, 426 } 427 } 428 429 osInfo := OSInfo{ 430 NodeCommon: NodeCommon{Addr: addr}, 431 Info: *info, 432 } 433 osInfo.Info.KernelVersion = kr 434 435 osInfo.Sensors, _ = host.SensorsTemperaturesWithContext(ctx) 436 437 return osInfo 438 } 439 440 // GetSysConfig returns config values from the system 441 // (only those affecting minio performance) 442 func GetSysConfig(_ context.Context, addr string) SysConfig { 443 sc := SysConfig{ 444 NodeCommon: NodeCommon{Addr: addr}, 445 Config: map[string]interface{}{}, 446 } 447 proc, err := procfs.Self() 448 if err != nil { 449 sc.Error = "rlimit: " + err.Error() 450 } else { 451 limits, err := proc.Limits() 452 if err != nil { 453 sc.Error = "rlimit: " + err.Error() 454 } 455 sc.Config["rlimit-max"] = limits.OpenFiles 456 } 457 458 zone, _ := time.Now().Zone() 459 sc.Config["time-info"] = TimeInfo{ 460 CurrentTime: time.Now(), 461 TimeZone: zone, 462 } 463 464 xfsErrorConfigs := getXFSErrorMaxRetries() 465 if len(xfsErrorConfigs.Configs) > 0 || len(xfsErrorConfigs.Error) > 0 { 466 sc.Config["xfs-error-config"] = xfsErrorConfigs 467 } 468 469 sc.Config["thp-config"] = getTHPConfigs() 470 471 return sc 472 } 473 474 func readIntFromFile(filePath string) (num int, err error) { 475 var file *os.File 476 file, err = os.Open(filePath) 477 if err != nil { 478 return 479 } 480 defer file.Close() 481 482 var data []byte 483 data, err = io.ReadAll(file) 484 if err != nil { 485 return 486 } 487 488 return strconv.Atoi(strings.TrimSpace(string(data))) 489 } 490 491 func getTHPConfigs() map[string]string { 492 configs := map[string]string{} 493 captureTHPConfig(configs, "/sys/kernel/mm/transparent_hugepage/enabled", "enabled") 494 captureTHPConfig(configs, "/sys/kernel/mm/transparent_hugepage/defrag", "defrag") 495 captureTHPConfig(configs, "/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none", "max_ptes_none") 496 return configs 497 } 498 499 func captureTHPConfig(configs map[string]string, filePath string, cfgName string) { 500 errFieldName := cfgName + "_error" 501 data, err := os.ReadFile(filePath) 502 if err != nil { 503 configs[errFieldName] = err.Error() 504 return 505 } 506 configs[cfgName] = strings.TrimSpace(string(data)) 507 } 508 509 func getXFSErrorMaxRetries() XFSErrorConfigs { 510 xfsErrCfgPattern := "/sys/fs/xfs/*/error/metadata/*/max_retries" 511 configFiles, err := filepath.Glob(xfsErrCfgPattern) 512 if err != nil { 513 return XFSErrorConfigs{Error: err.Error()} 514 } 515 516 configs := []XFSErrorConfig{} 517 var errMsg string 518 for _, configFile := range configFiles { 519 maxRetries, err := readIntFromFile(configFile) 520 if err != nil { 521 errMsg = err.Error() 522 break 523 } 524 configs = append(configs, XFSErrorConfig{ 525 ConfigFile: configFile, 526 MaxRetries: maxRetries, 527 }) 528 } 529 return XFSErrorConfigs{ 530 Configs: configs, 531 Error: errMsg, 532 } 533 } 534 535 // GetSysServices returns info of sys services that affect minio 536 func GetSysServices(_ context.Context, addr string) SysServices { 537 ss := SysServices{ 538 NodeCommon: NodeCommon{Addr: addr}, 539 Services: []SysService{}, 540 } 541 srv, e := getSELinuxInfo() 542 if e != nil { 543 ss.Error = e.Error() 544 } else { 545 ss.Services = append(ss.Services, srv) 546 } 547 548 return ss 549 } 550 551 func getSELinuxInfo() (SysService, error) { 552 ss := SysService{Name: SrvSELinux} 553 554 file, err := os.Open("/etc/selinux/config") 555 if err != nil { 556 if errors.Is(err, os.ErrNotExist) { 557 ss.Status = SrvNotInstalled 558 return ss, nil 559 } 560 return ss, err 561 } 562 defer file.Close() 563 564 scanner := bufio.NewScanner(file) 565 for scanner.Scan() { 566 tokens := strings.SplitN(strings.TrimSpace(scanner.Text()), "=", 2) 567 if len(tokens) == 2 && tokens[0] == "SELINUX" { 568 ss.Status = tokens[1] 569 return ss, nil 570 } 571 } 572 573 return ss, scanner.Err() 574 } 575 576 // GetSysErrors returns issues in system setup/config 577 func GetSysErrors(_ context.Context, addr string) SysErrors { 578 se := SysErrors{NodeCommon: NodeCommon{Addr: addr}} 579 if runtime.GOOS != "linux" { 580 return se 581 } 582 583 ae, err := isAuditEnabled() 584 if err != nil { 585 se.Error = "audit: " + err.Error() 586 } else if ae { 587 se.Errors = append(se.Errors, SysErrAuditEnabled) 588 } 589 590 _, err = exec.LookPath("updatedb") 591 if err == nil { 592 se.Errors = append(se.Errors, SysErrUpdatedbInstalled) 593 } else if !strings.HasSuffix(err.Error(), exec.ErrNotFound.Error()) { 594 errMsg := "updatedb: " + err.Error() 595 if len(se.Error) == 0 { 596 se.Error = errMsg 597 } else { 598 se.Error = se.Error + ", " + errMsg 599 } 600 } 601 602 return se 603 } 604 605 // Audit is enabled if either `audit=1` is present in /proc/cmdline 606 // or the `kauditd` process is running 607 func isAuditEnabled() (bool, error) { 608 file, err := os.Open("/proc/cmdline") 609 if err != nil { 610 return false, err 611 } 612 defer file.Close() 613 614 scanner := bufio.NewScanner(file) 615 for scanner.Scan() { 616 if strings.Contains(scanner.Text(), "audit=1") { 617 return true, nil 618 } 619 } 620 621 return isKauditdRunning() 622 } 623 624 func isKauditdRunning() (bool, error) { 625 procs, err := process.Processes() 626 if err != nil { 627 return false, err 628 } 629 for _, proc := range procs { 630 pname, err := proc.Name() 631 if err == nil && pname == "kauditd" { 632 return true, nil 633 } 634 } 635 return false, nil 636 } 637 638 // MemInfo contains system's RAM and swap information. 639 type MemInfo struct { 640 NodeCommon 641 642 Total uint64 `json:"total,omitempty"` 643 Used uint64 `json:"used,omitempty"` 644 Free uint64 `json:"free,omitempty"` 645 Available uint64 `json:"available,omitempty"` 646 Shared uint64 `json:"shared,omitempty"` 647 Cache uint64 `json:"cache,omitempty"` 648 Buffers uint64 `json:"buffer,omitempty"` 649 SwapSpaceTotal uint64 `json:"swap_space_total,omitempty"` 650 SwapSpaceFree uint64 `json:"swap_space_free,omitempty"` 651 // Limit will store cgroup limit if configured and 652 // less than Total, otherwise same as Total 653 Limit uint64 `json:"limit,omitempty"` 654 } 655 656 // Get the final system memory limit chosen by the user. 657 // by default without any configuration on a vanilla Linux 658 // system you would see physical RAM limit. If cgroup 659 // is configured at some point in time this function 660 // would return the memory limit chosen for the given pid. 661 func getMemoryLimit(sysLimit uint64) uint64 { 662 // Following code is deliberately ignoring the error. 663 cGroupLimit, err := cgroup.GetMemoryLimit(os.Getpid()) 664 if err == nil && cGroupLimit <= sysLimit { 665 // cgroup limit is lesser than system limit means 666 // user wants to limit the memory usage further 667 return cGroupLimit 668 } 669 670 return sysLimit 671 } 672 673 // GetMemInfo returns system's RAM and swap information. 674 func GetMemInfo(ctx context.Context, addr string) MemInfo { 675 meminfo, err := mem.VirtualMemoryWithContext(ctx) 676 if err != nil { 677 return MemInfo{ 678 NodeCommon: NodeCommon{ 679 Addr: addr, 680 Error: err.Error(), 681 }, 682 } 683 } 684 685 swapinfo, err := mem.SwapMemoryWithContext(ctx) 686 if err != nil { 687 return MemInfo{ 688 NodeCommon: NodeCommon{ 689 Addr: addr, 690 Error: err.Error(), 691 }, 692 } 693 } 694 695 return MemInfo{ 696 NodeCommon: NodeCommon{Addr: addr}, 697 Total: meminfo.Total, 698 Used: meminfo.Used, 699 Free: meminfo.Free, 700 Available: meminfo.Available, 701 Shared: meminfo.Shared, 702 Cache: meminfo.Cached, 703 Buffers: meminfo.Buffers, 704 SwapSpaceTotal: swapinfo.Total, 705 SwapSpaceFree: swapinfo.Free, 706 Limit: getMemoryLimit(meminfo.Total), 707 } 708 } 709 710 // ProcInfo contains current process's information. 711 type ProcInfo struct { 712 NodeCommon 713 714 PID int32 `json:"pid,omitempty"` 715 IsBackground bool `json:"is_background,omitempty"` 716 CPUPercent float64 `json:"cpu_percent,omitempty"` 717 ChildrenPIDs []int32 `json:"children_pids,omitempty"` 718 CmdLine string `json:"cmd_line,omitempty"` 719 NumConnections int `json:"num_connections,omitempty"` 720 CreateTime int64 `json:"create_time,omitempty"` 721 CWD string `json:"cwd,omitempty"` 722 ExecPath string `json:"exec_path,omitempty"` 723 GIDs []int32 `json:"gids,omitempty"` 724 IOCounters process.IOCountersStat `json:"iocounters,omitempty"` 725 IsRunning bool `json:"is_running,omitempty"` 726 MemInfo process.MemoryInfoStat `json:"mem_info,omitempty"` 727 MemMaps []process.MemoryMapsStat `json:"mem_maps,omitempty"` 728 MemPercent float32 `json:"mem_percent,omitempty"` 729 Name string `json:"name,omitempty"` 730 Nice int32 `json:"nice,omitempty"` 731 NumCtxSwitches process.NumCtxSwitchesStat `json:"num_ctx_switches,omitempty"` 732 NumFDs int32 `json:"num_fds,omitempty"` 733 NumThreads int32 `json:"num_threads,omitempty"` 734 PageFaults process.PageFaultsStat `json:"page_faults,omitempty"` 735 PPID int32 `json:"ppid,omitempty"` 736 Status string `json:"status,omitempty"` 737 TGID int32 `json:"tgid,omitempty"` 738 Times cpu.TimesStat `json:"times,omitempty"` 739 UIDs []int32 `json:"uids,omitempty"` 740 Username string `json:"username,omitempty"` 741 } 742 743 // GetProcInfo returns current MinIO process information. 744 func GetProcInfo(ctx context.Context, addr string) ProcInfo { 745 pid := int32(syscall.Getpid()) 746 747 procInfo := ProcInfo{ 748 NodeCommon: NodeCommon{Addr: addr}, 749 PID: pid, 750 } 751 var err error 752 753 proc, err := process.NewProcess(pid) 754 if err != nil { 755 procInfo.Error = err.Error() 756 return procInfo 757 } 758 759 procInfo.IsBackground, err = proc.BackgroundWithContext(ctx) 760 if err != nil { 761 procInfo.Error = err.Error() 762 return procInfo 763 } 764 765 procInfo.CPUPercent, err = proc.CPUPercentWithContext(ctx) 766 if err != nil { 767 procInfo.Error = err.Error() 768 return procInfo 769 } 770 771 procInfo.ChildrenPIDs = []int32{} 772 children, _ := proc.ChildrenWithContext(ctx) 773 for i := range children { 774 procInfo.ChildrenPIDs = append(procInfo.ChildrenPIDs, children[i].Pid) 775 } 776 777 procInfo.CmdLine, err = proc.CmdlineWithContext(ctx) 778 if err != nil { 779 procInfo.Error = err.Error() 780 return procInfo 781 } 782 783 connections, err := proc.ConnectionsWithContext(ctx) 784 if err != nil { 785 procInfo.Error = err.Error() 786 return procInfo 787 } 788 procInfo.NumConnections = len(connections) 789 790 procInfo.CreateTime, err = proc.CreateTimeWithContext(ctx) 791 if err != nil { 792 procInfo.Error = err.Error() 793 return procInfo 794 } 795 796 procInfo.CWD, err = proc.CwdWithContext(ctx) 797 if err != nil { 798 procInfo.Error = err.Error() 799 return procInfo 800 } 801 802 procInfo.ExecPath, err = proc.ExeWithContext(ctx) 803 if err != nil { 804 procInfo.Error = err.Error() 805 return procInfo 806 } 807 808 procInfo.GIDs, err = proc.GidsWithContext(ctx) 809 if err != nil { 810 procInfo.Error = err.Error() 811 return procInfo 812 } 813 814 ioCounters, err := proc.IOCountersWithContext(ctx) 815 if err != nil { 816 procInfo.Error = err.Error() 817 return procInfo 818 } 819 procInfo.IOCounters = *ioCounters 820 821 procInfo.IsRunning, err = proc.IsRunningWithContext(ctx) 822 if err != nil { 823 procInfo.Error = err.Error() 824 return procInfo 825 } 826 827 memInfo, err := proc.MemoryInfoWithContext(ctx) 828 if err != nil { 829 procInfo.Error = err.Error() 830 return procInfo 831 } 832 procInfo.MemInfo = *memInfo 833 834 memMaps, err := proc.MemoryMapsWithContext(ctx, true) 835 if err != nil { 836 procInfo.Error = err.Error() 837 return procInfo 838 } 839 procInfo.MemMaps = *memMaps 840 841 procInfo.MemPercent, err = proc.MemoryPercentWithContext(ctx) 842 if err != nil { 843 procInfo.Error = err.Error() 844 return procInfo 845 } 846 847 procInfo.Name, err = proc.NameWithContext(ctx) 848 if err != nil { 849 procInfo.Error = err.Error() 850 return procInfo 851 } 852 853 procInfo.Nice, err = proc.NiceWithContext(ctx) 854 if err != nil { 855 procInfo.Error = err.Error() 856 return procInfo 857 } 858 859 numCtxSwitches, err := proc.NumCtxSwitchesWithContext(ctx) 860 if err != nil { 861 procInfo.Error = err.Error() 862 return procInfo 863 } 864 procInfo.NumCtxSwitches = *numCtxSwitches 865 866 procInfo.NumFDs, err = proc.NumFDsWithContext(ctx) 867 if err != nil { 868 procInfo.Error = err.Error() 869 return procInfo 870 } 871 872 procInfo.NumThreads, err = proc.NumThreadsWithContext(ctx) 873 if err != nil { 874 procInfo.Error = err.Error() 875 return procInfo 876 } 877 878 pageFaults, err := proc.PageFaultsWithContext(ctx) 879 if err != nil { 880 procInfo.Error = err.Error() 881 return procInfo 882 } 883 procInfo.PageFaults = *pageFaults 884 885 procInfo.PPID, _ = proc.PpidWithContext(ctx) 886 887 status, err := proc.StatusWithContext(ctx) 888 if err != nil { 889 procInfo.Error = err.Error() 890 return procInfo 891 } 892 procInfo.Status = status[0] 893 894 procInfo.TGID, err = proc.Tgid() 895 if err != nil { 896 procInfo.Error = err.Error() 897 return procInfo 898 } 899 900 times, err := proc.TimesWithContext(ctx) 901 if err != nil { 902 procInfo.Error = err.Error() 903 return procInfo 904 } 905 procInfo.Times = *times 906 907 procInfo.UIDs, err = proc.UidsWithContext(ctx) 908 if err != nil { 909 procInfo.Error = err.Error() 910 return procInfo 911 } 912 913 // In certain environments, it is not possible to get username e.g. minio-operator 914 // Plus it's not a serious error. So ignore error if any. 915 procInfo.Username, err = proc.UsernameWithContext(ctx) 916 if err != nil { 917 procInfo.Username = "<non-root>" 918 } 919 920 return procInfo 921 } 922 923 // SysInfo - Includes hardware and system information of the MinIO cluster 924 type SysInfo struct { 925 CPUInfo []CPUs `json:"cpus,omitempty"` 926 Partitions []Partitions `json:"partitions,omitempty"` 927 OSInfo []OSInfo `json:"osinfo,omitempty"` 928 MemInfo []MemInfo `json:"meminfo,omitempty"` 929 ProcInfo []ProcInfo `json:"procinfo,omitempty"` 930 NetInfo []NetInfo `json:"netinfo,omitempty"` 931 SysErrs []SysErrors `json:"errors,omitempty"` 932 SysServices []SysServices `json:"services,omitempty"` 933 SysConfig []SysConfig `json:"config,omitempty"` 934 KubernetesInfo KubernetesInfo `json:"kubernetes"` 935 } 936 937 // KubernetesInfo - Information about the kubernetes platform 938 type KubernetesInfo struct { 939 Major string `json:"major,omitempty"` 940 Minor string `json:"minor,omitempty"` 941 GitVersion string `json:"gitVersion,omitempty"` 942 GitCommit string `json:"gitCommit,omitempty"` 943 BuildDate time.Time `json:"buildDate,omitempty"` 944 Platform string `json:"platform,omitempty"` 945 Error string `json:"error,omitempty"` 946 } 947 948 // SpeedTestResults - Includes perf test results of the MinIO cluster 949 type SpeedTestResults struct { 950 DrivePerf []DriveSpeedTestResult `json:"drive,omitempty"` 951 ObjPerf []SpeedTestResult `json:"obj,omitempty"` 952 NetPerf []NetperfNodeResult `json:"net,omitempty"` 953 Error string `json:"error,omitempty"` 954 } 955 956 // MinioConfig contains minio configuration of a node. 957 type MinioConfig struct { 958 Error string `json:"error,omitempty"` 959 960 Config interface{} `json:"config,omitempty"` 961 } 962 963 // MemStats is strip down version of runtime.MemStats containing memory stats of MinIO server. 964 type MemStats struct { 965 Alloc uint64 966 TotalAlloc uint64 967 Mallocs uint64 968 Frees uint64 969 HeapAlloc uint64 970 } 971 972 // GCStats collect information about recent garbage collections. 973 type GCStats struct { 974 LastGC time.Time `json:"last_gc"` // time of last collection 975 NumGC int64 `json:"num_gc"` // number of garbage collections 976 PauseTotal time.Duration `json:"pause_total"` // total pause for all collections 977 Pause []time.Duration `json:"pause"` // pause history, most recent first 978 PauseEnd []time.Time `json:"pause_end"` // pause end times history, most recent first 979 } 980 981 // ServerInfo holds server information 982 type ServerInfo struct { 983 State string `json:"state,omitempty"` 984 Endpoint string `json:"endpoint,omitempty"` 985 Uptime int64 `json:"uptime,omitempty"` 986 Version string `json:"version,omitempty"` 987 CommitID string `json:"commitID,omitempty"` 988 Network map[string]string `json:"network,omitempty"` 989 Drives []Disk `json:"drives,omitempty"` 990 PoolNumber int `json:"poolNumber,omitempty"` // Only set if len(PoolNumbers) == 1 991 PoolNumbers []int `json:"poolNumbers,omitempty"` 992 MemStats MemStats `json:"mem_stats"` 993 GoMaxProcs int `json:"go_max_procs"` 994 NumCPU int `json:"num_cpu"` 995 RuntimeVersion string `json:"runtime_version"` 996 GCStats *GCStats `json:"gc_stats,omitempty"` 997 MinioEnvVars map[string]string `json:"minio_env_vars,omitempty"` 998 } 999 1000 // MinioInfo contains MinIO server and object storage information. 1001 type MinioInfo struct { 1002 Mode string `json:"mode,omitempty"` 1003 Domain []string `json:"domain,omitempty"` 1004 Region string `json:"region,omitempty"` 1005 SQSARN []string `json:"sqsARN,omitempty"` 1006 DeploymentID string `json:"deploymentID,omitempty"` 1007 Buckets Buckets `json:"buckets,omitempty"` 1008 Objects Objects `json:"objects,omitempty"` 1009 Usage Usage `json:"usage,omitempty"` 1010 Services Services `json:"services,omitempty"` 1011 Backend interface{} `json:"backend,omitempty"` 1012 Servers []ServerInfo `json:"servers,omitempty"` 1013 TLS *TLSInfo `json:"tls"` 1014 IsKubernetes *bool `json:"is_kubernetes"` 1015 IsDocker *bool `json:"is_docker"` 1016 Metrics *RealtimeMetrics `json:"metrics,omitempty"` 1017 } 1018 1019 type TLSInfo struct { 1020 TLSEnabled bool `json:"tls_enabled"` 1021 Certs []TLSCert `json:"certs,omitempty"` 1022 } 1023 1024 type TLSCert struct { 1025 PubKeyAlgo string `json:"pub_key_algo"` 1026 SignatureAlgo string `json:"signature_algo"` 1027 NotBefore time.Time `json:"not_before"` 1028 NotAfter time.Time `json:"not_after"` 1029 Checksum string `json:"checksum"` 1030 } 1031 1032 // MinioHealthInfo - Includes MinIO confifuration information 1033 type MinioHealthInfo struct { 1034 Error string `json:"error,omitempty"` 1035 1036 Config MinioConfig `json:"config,omitempty"` 1037 Info MinioInfo `json:"info,omitempty"` 1038 } 1039 1040 // HealthInfo - MinIO cluster's health Info 1041 type HealthInfo struct { 1042 Version string `json:"version"` 1043 Error string `json:"error,omitempty"` 1044 1045 TimeStamp time.Time `json:"timestamp,omitempty"` 1046 Sys SysInfo `json:"sys,omitempty"` 1047 Minio MinioHealthInfo `json:"minio,omitempty"` 1048 } 1049 1050 func (info HealthInfo) String() string { 1051 data, err := json.Marshal(info) 1052 if err != nil { 1053 panic(err) // This never happens. 1054 } 1055 return string(data) 1056 } 1057 1058 // JSON returns this structure as JSON formatted string. 1059 func (info HealthInfo) JSON() string { 1060 data, err := json.MarshalIndent(info, " ", " ") 1061 if err != nil { 1062 panic(err) // This never happens. 1063 } 1064 return string(data) 1065 } 1066 1067 // GetError - returns error from the cluster health info 1068 func (info HealthInfo) GetError() string { 1069 return info.Error 1070 } 1071 1072 // GetStatus - returns status of the cluster health info 1073 func (info HealthInfo) GetStatus() string { 1074 if info.Error != "" { 1075 return "error" 1076 } 1077 return "success" 1078 } 1079 1080 // GetTimestamp - returns timestamp from the cluster health info 1081 func (info HealthInfo) GetTimestamp() time.Time { 1082 return info.TimeStamp 1083 } 1084 1085 // HealthDataType - Typed Health data types 1086 type HealthDataType string 1087 1088 // HealthDataTypes 1089 const ( 1090 HealthDataTypeMinioInfo HealthDataType = "minioinfo" 1091 HealthDataTypeMinioConfig HealthDataType = "minioconfig" 1092 HealthDataTypeSysCPU HealthDataType = "syscpu" 1093 HealthDataTypeSysDriveHw HealthDataType = "sysdrivehw" 1094 HealthDataTypeSysDocker HealthDataType = "sysdocker" // is this really needed? 1095 HealthDataTypeSysOsInfo HealthDataType = "sysosinfo" 1096 HealthDataTypeSysLoad HealthDataType = "sysload" // provides very little info. Making it TBD 1097 HealthDataTypeSysMem HealthDataType = "sysmem" 1098 HealthDataTypeSysNet HealthDataType = "sysnet" 1099 HealthDataTypeSysProcess HealthDataType = "sysprocess" 1100 HealthDataTypeSysErrors HealthDataType = "syserrors" 1101 HealthDataTypeSysServices HealthDataType = "sysservices" 1102 HealthDataTypeSysConfig HealthDataType = "sysconfig" 1103 ) 1104 1105 // HealthDataTypesMap - Map of Health datatypes 1106 var HealthDataTypesMap = map[string]HealthDataType{ 1107 "minioinfo": HealthDataTypeMinioInfo, 1108 "minioconfig": HealthDataTypeMinioConfig, 1109 "syscpu": HealthDataTypeSysCPU, 1110 "sysdrivehw": HealthDataTypeSysDriveHw, 1111 "sysdocker": HealthDataTypeSysDocker, 1112 "sysosinfo": HealthDataTypeSysOsInfo, 1113 "sysload": HealthDataTypeSysLoad, 1114 "sysmem": HealthDataTypeSysMem, 1115 "sysnet": HealthDataTypeSysNet, 1116 "sysprocess": HealthDataTypeSysProcess, 1117 "syserrors": HealthDataTypeSysErrors, 1118 "sysservices": HealthDataTypeSysServices, 1119 "sysconfig": HealthDataTypeSysConfig, 1120 } 1121 1122 // HealthDataTypesList - List of health datatypes 1123 var HealthDataTypesList = []HealthDataType{ 1124 HealthDataTypeMinioInfo, 1125 HealthDataTypeMinioConfig, 1126 HealthDataTypeSysCPU, 1127 HealthDataTypeSysDriveHw, 1128 HealthDataTypeSysDocker, 1129 HealthDataTypeSysOsInfo, 1130 HealthDataTypeSysLoad, 1131 HealthDataTypeSysMem, 1132 HealthDataTypeSysNet, 1133 HealthDataTypeSysProcess, 1134 HealthDataTypeSysErrors, 1135 HealthDataTypeSysServices, 1136 HealthDataTypeSysConfig, 1137 } 1138 1139 // HealthInfoVersionStruct - struct for health info version 1140 type HealthInfoVersionStruct struct { 1141 Version string `json:"version,omitempty"` 1142 Error string `json:"error,omitempty"` 1143 } 1144 1145 // ServerHealthInfo - Connect to a minio server and call Health Info Management API 1146 // to fetch server's information represented by HealthInfo structure 1147 func (adm *AdminClient) ServerHealthInfo(ctx context.Context, types []HealthDataType, deadline time.Duration, anonymize string) (*http.Response, string, error) { 1148 v := url.Values{} 1149 v.Set("deadline", deadline.Truncate(1*time.Second).String()) 1150 v.Set("anonymize", anonymize) 1151 for _, d := range HealthDataTypesList { // Init all parameters to false. 1152 v.Set(string(d), "false") 1153 } 1154 for _, d := range types { 1155 v.Set(string(d), "true") 1156 } 1157 1158 resp, err := adm.executeMethod( 1159 ctx, "GET", requestData{ 1160 relPath: adminAPIPrefix + "/healthinfo", 1161 queryValues: v, 1162 }, 1163 ) 1164 if err != nil { 1165 closeResponse(resp) 1166 return nil, "", err 1167 } 1168 1169 if resp.StatusCode != http.StatusOK { 1170 closeResponse(resp) 1171 return nil, "", httpRespToErrorResponse(resp) 1172 } 1173 1174 decoder := json.NewDecoder(resp.Body) 1175 var version HealthInfoVersionStruct 1176 if err = decoder.Decode(&version); err != nil { 1177 closeResponse(resp) 1178 return nil, "", err 1179 } 1180 1181 if version.Error != "" { 1182 closeResponse(resp) 1183 return nil, "", errors.New(version.Error) 1184 } 1185 1186 switch version.Version { 1187 case "", HealthInfoVersion2, HealthInfoVersion: 1188 default: 1189 closeResponse(resp) 1190 return nil, "", errors.New("Upgrade Minio Client to support health info version " + version.Version) 1191 } 1192 1193 return resp, version.Version, nil 1194 }