bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/processes_windows.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "os" 6 "regexp" 7 "strings" 8 9 "bosun.org/cmd/scollector/conf" 10 "bosun.org/metadata" 11 "bosun.org/opentsdb" 12 "bosun.org/util" 13 "github.com/StackExchange/wmi" 14 ) 15 16 var regexesProcesses = []*regexp.Regexp{} 17 18 func AddProcessConfig(params conf.ProcessParams) error { 19 if params.Name == "" { 20 return fmt.Errorf("empty process Name") 21 } 22 reg, err := regexp.Compile(params.Name) 23 if err != nil { 24 return err 25 } 26 regexesProcesses = append(regexesProcesses, reg) 27 return nil 28 } 29 30 func WatchProcesses() { 31 if len(regexesProcesses) == 0 { 32 // if no process settings configured in config file, use this set instead. 33 regexesProcesses = append(regexesProcesses, regexp.MustCompile("chrome|powershell|scollector|WinRM|MSSQLSERVER")) 34 } 35 collectors = append(collectors, &IntervalCollector{ 36 F: c_windows_processes, 37 }) 38 } 39 40 func c_windows_processes() (opentsdb.MultiDataPoint, error) { 41 var dst []Win32_PerfRawData_PerfProc_Process 42 var q = wmi.CreateQuery(&dst, `WHERE Name <> '_Total'`) 43 err := queryWmi(q, &dst) 44 if err != nil { 45 return nil, err 46 } 47 48 var svc_dst []Win32_Service 49 var svc_q = wmi.CreateQuery(&svc_dst, "") 50 err = queryWmi(svc_q, &svc_dst) 51 if err != nil { 52 return nil, err 53 } 54 55 var iis_dst []WorkerProcess 56 iis_q := wmi.CreateQuery(&iis_dst, "") 57 err = queryWmiNamespace(iis_q, &iis_dst, "root\\WebAdministration") 58 if err != nil { 59 // Don't return from this error since the name space might exist. 60 iis_dst = nil 61 } 62 63 var numberOfLogicalProcessors uint64 64 var core_dst []Win32_ComputerSystem 65 var core_q = wmi.CreateQuery(&core_dst, "") 66 err = queryWmi(core_q, &core_dst) 67 if err != nil { 68 return nil, err 69 } 70 for _, y := range core_dst { 71 numberOfLogicalProcessors = uint64(y.NumberOfLogicalProcessors) 72 } 73 if numberOfLogicalProcessors == 0 { 74 return nil, fmt.Errorf("invalid result: numberOfLogicalProcessors=%v", numberOfLogicalProcessors) 75 } 76 77 var md opentsdb.MultiDataPoint 78 var svc_dst_started []Win32_Service 79 for _, svc := range svc_dst { 80 if util.NameMatches(svc.Name, regexesProcesses) { 81 if svc.Started { 82 svc_dst_started = append(svc_dst_started, svc) 83 } 84 tags := opentsdb.TagSet{"name": svc.Name} 85 Add(&md, "win.service.started", util.Btoi(svc.Started), tags, metadata.Gauge, metadata.Bool, descWinServiceStarted) 86 Add(&md, "win.service.status", util.Btoi(svc.Status != "OK"), tags, metadata.Gauge, metadata.Ok, descWinServiceStatus) 87 Add(&md, "win.service.checkpoint", svc.CheckPoint, tags, metadata.Gauge, metadata.None, descWinServiceCheckPoint) 88 Add(&md, "win.service.wait_hint", svc.WaitHint, tags, metadata.Gauge, metadata.MilliSecond, descWinServiceWaitHint) 89 Add(&md, osServiceRunning, util.Btoi(svc.Started), tags, metadata.Gauge, metadata.Bool, osServiceRunningDesc) 90 } 91 } 92 93 totalCPUByName := make(map[string]uint64) 94 totalVirtualMemByName := make(map[string]uint64) 95 totalPrivateWSMemByName := make(map[string]uint64) 96 countByName := make(map[string]int) 97 98 for _, v := range dst { 99 var name string 100 service_match := false 101 iis_match := false 102 103 process_match := util.NameMatches(v.Name, regexesProcesses) 104 105 id := "0" 106 107 if process_match { 108 raw_name := strings.Split(v.Name, "#") 109 name = raw_name[0] 110 if len(raw_name) == 2 { 111 id = raw_name[1] 112 } 113 // If you have a hash sign in your process name you don't deserve monitoring ;-) 114 if len(raw_name) > 2 { 115 continue 116 } 117 } 118 119 // A Service match could "overwrite" a process match, but that is probably what we would want 120 for _, svc := range svc_dst_started { 121 // It is possible the pid has gone and been reused, but I think this unlikely 122 // And I'm not aware of an atomic join we could do anyways 123 if svc.ProcessId != 0 && svc.ProcessId == v.IDProcess { 124 id = "0" 125 service_match = true 126 name = svc.Name 127 break 128 } 129 } 130 131 for _, a_pool := range iis_dst { 132 if a_pool.ProcessId == v.IDProcess { 133 id = "0" 134 iis_match = true 135 name = strings.Join([]string{"iis", a_pool.AppPoolName}, "_") 136 break 137 } 138 } 139 140 if v.IDProcess == uint32(os.Getpid()) { 141 TotalScollectorMemoryMB = v.WorkingSetPrivate / 1024 / 1024 142 } 143 144 if !(service_match || process_match || iis_match) { 145 continue 146 } 147 148 //Use timestamp from WMI to fix issues with CPU metrics 149 ts := TSys100NStoEpoch(v.Timestamp_Sys100NS) 150 tags := opentsdb.TagSet{"name": name, "id": id} 151 AddTS(&md, "win.proc.cpu", ts, v.PercentPrivilegedTime/NS100_Seconds/numberOfLogicalProcessors, opentsdb.TagSet{"type": "privileged"}.Merge(tags), metadata.Counter, metadata.Pct, descWinProcCPU_priv) 152 AddTS(&md, "win.proc.cpu", ts, v.PercentUserTime/NS100_Seconds/numberOfLogicalProcessors, opentsdb.TagSet{"type": "user"}.Merge(tags), metadata.Counter, metadata.Pct, descWinProcCPU_user) 153 totalCPUByName[name] += v.PercentUserTime / NS100_Seconds / numberOfLogicalProcessors 154 AddTS(&md, "win.proc.cpu_total", ts, v.PercentProcessorTime/NS100_Seconds/numberOfLogicalProcessors, tags, metadata.Counter, metadata.Pct, descWinProcCPU_total) 155 if v.Frequency_Object != 0 { 156 Add(&md, "win.proc.elapsed_time", (v.Timestamp_Object-v.ElapsedTime)/v.Frequency_Object, tags, metadata.Gauge, metadata.Second, descWinProcElapsed_time) 157 } 158 Add(&md, "win.proc.handle_count", v.HandleCount, tags, metadata.Gauge, metadata.Count, descWinProcHandle_count) 159 Add(&md, "win.proc.io_bytes", v.IOOtherBytesPersec, opentsdb.TagSet{"type": "other"}.Merge(tags), metadata.Counter, metadata.BytesPerSecond, descWinProcIo_bytes_other) 160 Add(&md, "win.proc.io_bytes", v.IOReadBytesPersec, opentsdb.TagSet{"type": "read"}.Merge(tags), metadata.Counter, metadata.BytesPerSecond, descWinProcIo_bytes_read) 161 Add(&md, "win.proc.io_bytes", v.IOWriteBytesPersec, opentsdb.TagSet{"type": "write"}.Merge(tags), metadata.Counter, metadata.BytesPerSecond, descWinProcIo_bytes_write) 162 Add(&md, "win.proc.io_operations", v.IOOtherOperationsPersec, opentsdb.TagSet{"type": "other"}.Merge(tags), metadata.Counter, metadata.Operation, descWinProcIo_operations) 163 Add(&md, "win.proc.io_operations", v.IOReadOperationsPersec, opentsdb.TagSet{"type": "read"}.Merge(tags), metadata.Counter, metadata.Operation, descWinProcIo_operations_read) 164 Add(&md, "win.proc.io_operations", v.IOWriteOperationsPersec, opentsdb.TagSet{"type": "write"}.Merge(tags), metadata.Counter, metadata.Operation, descWinProcIo_operations_write) 165 Add(&md, "win.proc.mem.page_faults", v.PageFaultsPersec, tags, metadata.Counter, metadata.PerSecond, descWinProcMemPage_faults) 166 Add(&md, "win.proc.mem.pagefile_bytes", v.PageFileBytes, tags, metadata.Gauge, metadata.Bytes, descWinProcMemPagefile_bytes) 167 Add(&md, "win.proc.mem.pagefile_bytes_peak", v.PageFileBytesPeak, tags, metadata.Gauge, metadata.Bytes, descWinProcMemPagefile_bytes_peak) 168 Add(&md, "win.proc.mem.pool_nonpaged_bytes", v.PoolNonpagedBytes, tags, metadata.Gauge, metadata.Bytes, descWinProcMemPool_nonpaged_bytes) 169 Add(&md, "win.proc.mem.pool_paged_bytes", v.PoolPagedBytes, tags, metadata.Gauge, metadata.Bytes, descWinProcMemPool_paged_bytes) 170 Add(&md, "win.proc.mem.vm.bytes", v.VirtualBytes, tags, metadata.Gauge, metadata.Bytes, descWinProcMemVmBytes) 171 totalVirtualMemByName[name] += v.VirtualBytes 172 Add(&md, "win.proc.mem.vm.bytes_peak", v.VirtualBytesPeak, tags, metadata.Gauge, metadata.Bytes, descWinProcMemVmBytes_peak) 173 Add(&md, "win.proc.mem.working_set", v.WorkingSet, tags, metadata.Gauge, metadata.Bytes, descWinProcMemWorking_set) 174 Add(&md, "win.proc.mem.working_set_peak", v.WorkingSetPeak, tags, metadata.Gauge, metadata.Bytes, descWinProcMemWorking_set_peak) 175 Add(&md, "win.proc.mem.working_set_private", v.WorkingSetPrivate, tags, metadata.Gauge, metadata.Bytes, descWinProcMemWorking_set_private) 176 totalPrivateWSMemByName[name] += v.WorkingSetPrivate 177 Add(&md, "win.proc.priority_base", v.PriorityBase, tags, metadata.Gauge, metadata.None, descWinProcPriority_base) 178 Add(&md, "win.proc.private_bytes", v.PrivateBytes, tags, metadata.Gauge, metadata.Bytes, descWinProcPrivate_bytes) 179 Add(&md, "win.proc.thread_count", v.ThreadCount, tags, metadata.Gauge, metadata.Count, descWinProcthread_count) 180 Add(&md, "win.proc.pid", v.IDProcess, tags, metadata.Gauge, metadata.Unit("PID"), osProcPID) 181 countByName[name]++ 182 } 183 for name, count := range countByName { 184 if count < 1 { 185 continue 186 } 187 Add(&md, osProcCount, count, opentsdb.TagSet{"name": name}, metadata.Gauge, metadata.Process, osProcCountDesc) 188 if totalCPU, ok := totalCPUByName[name]; ok { 189 Add(&md, osProcCPU, totalCPU, opentsdb.TagSet{"name": name}, metadata.Counter, metadata.Pct, osProcCPUDesc) 190 } 191 if totalVM, ok := totalVirtualMemByName[name]; ok { 192 Add(&md, osProcMemVirtual, totalVM, opentsdb.TagSet{"name": name}, metadata.Gauge, metadata.Bytes, osProcMemVirtualDesc) 193 } 194 if totalPWS, ok := totalPrivateWSMemByName[name]; ok { 195 Add(&md, osProcMemReal, totalPWS, opentsdb.TagSet{"name": name}, metadata.Gauge, metadata.Bytes, osProcMemRealDesc) 196 } 197 } 198 return md, nil 199 } 200 201 // Divide CPU by 1e5 because: 1 seconds / 100 Nanoseconds = 1e7. This is the 202 // percent time as a decimal, so divide by two less zeros to make it the same as 203 // the result * 100. 204 const NS100_Seconds = 1e5 205 206 const ( 207 descWinProcCPU_priv = "Percentage of elapsed time that this thread has spent executing code in privileged mode." 208 descWinProcCPU_total = "Percentage of elapsed time that this process's threads have spent executing code in user or privileged mode." 209 descWinProcCPU_user = "Percentage of elapsed time that this process's threads have spent executing code in user mode." 210 descWinProcElapsed_time = "Elapsed time in seconds this process has been running." 211 descWinProcHandle_count = "Total number of handles the process has open across all threads." 212 descWinProcIo_bytes_other = "Rate at which the process is issuing bytes to I/O operations that do not involve data such as control operations." 213 descWinProcIo_bytes_read = "Rate at which the process is reading bytes from I/O operations." 214 descWinProcIo_bytes_write = "Rate at which the process is writing bytes to I/O operations." 215 descWinProcIo_operations = "Rate at which the process is issuing I/O operations that are neither a read or a write request." 216 descWinProcIo_operations_read = "Rate at which the process is issuing read I/O operations." 217 descWinProcIo_operations_write = "Rate at which the process is issuing write I/O operations." 218 descWinProcMemPage_faults = "Rate of page faults by the threads executing in this process." 219 descWinProcMemPagefile_bytes = "Current number of bytes this process has used in the paging file(s)." 220 descWinProcMemPagefile_bytes_peak = "Maximum number of bytes this process has used in the paging file(s)." 221 descWinProcMemPool_nonpaged_bytes = "Total number of bytes for objects that cannot be written to disk when they are not being used." 222 descWinProcMemPool_paged_bytes = "Total number of bytes for objects that can be written to disk when they are not being used." 223 descWinProcMemVmBytes = "Current size, in bytes, of the virtual address space that the process is using." 224 descWinProcMemVmBytes_peak = "Maximum number of bytes of virtual address space that the process has used at any one time." 225 descWinProcMemWorking_set = "Current number of bytes in the working set of this process at any point in time." 226 descWinProcMemWorking_set_peak = "Maximum number of bytes in the working set of this process at any point in time." 227 descWinProcMemWorking_set_private = "Current number of bytes in the working set that are not shared with other processes." 228 descWinProcPriority_base = "Current base priority of this process. Threads within a process can raise and lower their own base priority relative to the process base priority of the process." 229 descWinProcPrivate_bytes = "Current number of bytes this process has allocated that cannot be shared with other processes." 230 descWinProcthread_count = "Number of threads currently active in this process." 231 ) 232 233 // Actually a CIM_StatisticalInformation. 234 type Win32_PerfRawData_PerfProc_Process struct { 235 ElapsedTime uint64 236 Frequency_Object uint64 237 HandleCount uint32 238 IDProcess uint32 239 IOOtherBytesPersec uint64 240 IOOtherOperationsPersec uint64 241 IOReadBytesPersec uint64 242 IOReadOperationsPersec uint64 243 IOWriteBytesPersec uint64 244 IOWriteOperationsPersec uint64 245 Name string 246 PageFaultsPersec uint32 247 PageFileBytes uint64 248 PageFileBytesPeak uint64 249 PercentPrivilegedTime uint64 250 PercentProcessorTime uint64 251 PercentUserTime uint64 252 PoolNonpagedBytes uint32 253 PoolPagedBytes uint32 254 PriorityBase uint32 255 PrivateBytes uint64 256 ThreadCount uint32 257 Timestamp_Object uint64 258 Timestamp_Sys100NS uint64 259 VirtualBytes uint64 260 VirtualBytesPeak uint64 261 WorkingSet uint64 262 WorkingSetPeak uint64 263 WorkingSetPrivate uint64 264 } 265 266 const ( 267 descWinServiceCheckPoint = "The CheckPoint property specifies a value that the service increments periodically to report its progress during a lengthy start, stop, pause, or continue operation. For example, the service should increment this value as it completes each step of its initialization when it is starting up. The user interface program that invoked the operation on the service uses this value to track the progress of the service during a lengthy operation. This value is not valid and should be zero when the service does not have a start, stop, pause, or continue operation pending." 268 descWinServiceStarted = "Started is a boolean indicating whether the service has been started (TRUE), or stopped (FALSE)." 269 descWinServiceStatus = "The Status property indicates the current status of the object. Right now 0=OK and 1=Not OK, but various operational and non-operational statuses can be defined such as OK, Degraded, Pred Fail, Error, Starting, Stopping, and Service." 270 descWinServiceWaitHint = "The WaitHint property specifies the estimated time required (in milliseconds) for a pending start, stop, pause, or continue operation. After the specified amount of time has elapsed, the service makes its next call to the SetServiceStatus function with either an incremented CheckPoint value or a change in Current State. If the amount of time specified by WaitHint passes, and CheckPoint has not been incremented, or the Current State has not changed, the service control manager or service control program assumes that an error has occurred." 271 ) 272 273 // Actually a Win32_BaseServce. 274 type Win32_Service struct { 275 CheckPoint uint32 276 Name string 277 ProcessId uint32 278 Started bool 279 Status string 280 WaitHint uint32 281 StartMode string 282 } 283 284 type WorkerProcess struct { 285 AppPoolName string 286 ProcessId uint32 287 }