github.com/netdata/go.d.plugin@v0.58.1/modules/hdfs/metrics.go (about) 1 // SPDX-License-Identifier: GPL-3.0-or-later 2 3 package hdfs 4 5 // HDFS Architecture 6 // https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html#NameNode+and+DataNodes 7 8 // Metrics description 9 // https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Metrics.html 10 11 // Good article 12 // https://www.datadoghq.com/blog/monitor-hadoop-metrics/#hdfs-metrics 13 14 type metrics struct { 15 Jvm *jvmMetrics `stm:"jvm"` // both 16 Rpc *rpcActivityMetrics `stm:"rpc"` // both 17 FSNameSystem *fsNameSystemMetrics `stm:"fsns"` // namenode 18 FSDatasetState *fsDatasetStateMetrics `stm:"fsds"` // datanode 19 DataNodeActivity *dataNodeActivityMetrics `stm:"dna"` // datanode 20 } 21 22 type jvmMetrics struct { 23 ProcessName string `json:"tag.ProcessName"` 24 HostName string `json:"tag.Hostname"` 25 //MemNonHeapUsedM float64 `stm:"mem_non_heap_used,1000,1"` 26 //MemNonHeapCommittedM float64 `stm:"mem_non_heap_committed,1000,1"` 27 //MemNonHeapMaxM float64 `stm:"mem_non_heap_max"` 28 MemHeapUsedM float64 `stm:"mem_heap_used,1000,1"` 29 MemHeapCommittedM float64 `stm:"mem_heap_committed,1000,1"` 30 MemHeapMaxM float64 `stm:"mem_heap_max"` 31 //MemMaxM float64 `stm:"mem_max"` 32 GcCount float64 `stm:"gc_count"` 33 GcTimeMillis float64 `stm:"gc_time_millis"` 34 GcNumWarnThresholdExceeded float64 `stm:"gc_num_warn_threshold_exceeded"` 35 GcNumInfoThresholdExceeded float64 `stm:"gc_num_info_threshold_exceeded"` 36 GcTotalExtraSleepTime float64 `stm:"gc_total_extra_sleep_time"` 37 ThreadsNew float64 `stm:"threads_new"` 38 ThreadsRunnable float64 `stm:"threads_runnable"` 39 ThreadsBlocked float64 `stm:"threads_blocked"` 40 ThreadsWaiting float64 `stm:"threads_waiting"` 41 ThreadsTimedWaiting float64 `stm:"threads_timed_waiting"` 42 ThreadsTerminated float64 `stm:"threads_terminated"` 43 LogFatal float64 `stm:"log_fatal"` 44 LogError float64 `stm:"log_error"` 45 LogWarn float64 `stm:"log_warn"` 46 LogInfo float64 `stm:"log_info"` 47 } 48 49 type rpcActivityMetrics struct { 50 ReceivedBytes float64 `stm:"received_bytes"` 51 SentBytes float64 `stm:"sent_bytes"` 52 RpcQueueTimeNumOps float64 `stm:"queue_time_num_ops"` 53 RpcQueueTimeAvgTime float64 `stm:"queue_time_avg_time,1000,1"` 54 //RpcProcessingTimeNumOps float64 55 RpcProcessingTimeAvgTime float64 `stm:"processing_time_avg_time,1000,1"` 56 //DeferredRpcProcessingTimeNumOps float64 57 //DeferredRpcProcessingTimeAvgTime float64 58 //RpcAuthenticationFailures float64 59 //RpcAuthenticationSuccesses float64 60 //RpcAuthorizationFailures float64 61 //RpcAuthorizationSuccesses float64 62 //RpcClientBackoff float64 63 //RpcSlowCalls float64 64 NumOpenConnections float64 `stm:"num_open_connections"` 65 CallQueueLength float64 `stm:"call_queue_length"` 66 //NumDroppedConnections float64 67 } 68 69 type fsNameSystemMetrics struct { 70 HostName string `json:"tag.Hostname"` 71 HAState string `json:"tag.HAState"` 72 //TotalSyncTimes float64 `json:"tag.tag.TotalSyncTimes"` 73 MissingBlocks float64 `stm:"missing_blocks"` 74 //MissingReplOneBlocks float64 `stm:"missing_repl_one_blocks"` 75 //ExpiredHeartbeats float64 `stm:"expired_heartbeats"` 76 //TransactionsSinceLastCheckpoint float64 `stm:"transactions_since_last_checkpoint"` 77 //TransactionsSinceLastLogRoll float64 `stm:"transactions_since_last_log_roll"` 78 //LastWrittenTransactionId float64 `stm:"last_written_transaction_id"` 79 //LastCheckpointTime float64 `stm:"last_checkpoint_time"` 80 CapacityTotal float64 `stm:"capacity_total"` 81 //CapacityTotalGB float64 `stm:"capacity_total_gb"` 82 CapacityDfsUsed float64 `json:"CapacityUsed" stm:"capacity_used_dfs"` 83 //CapacityUsedGB float64 `stm:"capacity_used_gb"` 84 CapacityRemaining float64 `stm:"capacity_remaining"` 85 //ProvidedCapacityTotal float64 `stm:"provided_capacity_total"` 86 //CapacityRemainingGB float64 `stm:"capacity_remaining_gb"` 87 CapacityUsedNonDFS float64 `stm:"capacity_used_non_dfs"` 88 TotalLoad float64 `stm:"total_load"` 89 //SnapshottableDirectories float64 `stm:"snapshottable_directories"` 90 //Snapshots float64 `stm:"snapshots"` 91 //NumEncryptionZones float64 `stm:"num_encryption_zones"` 92 //LockQueueLength float64 `stm:"lock_queue_length"` 93 BlocksTotal float64 `stm:"blocks_total"` 94 //NumFilesUnderConstruction float64 `stm:"num_files_under_construction"` 95 //NumActiveClients float64 `stm:"num_active_clients"` 96 FilesTotal float64 `stm:"files_total"` 97 //PendingReplicationBlocks float64 `stm:"pending_replication_blocks"` 98 //PendingReconstructionBlocks float64 `stm:"pending_reconstruction_blocks"` 99 UnderReplicatedBlocks float64 `stm:"under_replicated_blocks"` 100 //LowRedundancyBlocks float64 `stm:"low_redundancy_blocks"` 101 CorruptBlocks float64 `stm:"corrupt_blocks"` 102 //ScheduledReplicationBlocks float64 `stm:"scheduled_replication_blocks"` 103 //PendingDeletionBlocks float64 `stm:"pending_deletion_blocks"` 104 //LowRedundancyReplicatedBlocks float64 `stm:"low_redundancy_replicated_blocks"` 105 //CorruptReplicatedBlocks float64 `stm:"corrupt_replicated_blocks"` 106 //MissingReplicatedBlocks float64 `stm:"missing_replicated_blocks"` 107 //MissingReplicationOneBlocks float64 `stm:"missing_replication_one_blocks"` 108 //HighestPriorityLowRedundancyReplicatedBlocks float64 `stm:"highest_priority_low_redundancy_replicated_blocks"` 109 //HighestPriorityLowRedundancyECBlocks float64 `stm:"highest_priority_low_redundancy_ec_blocks"` 110 //BytesInFutureReplicatedBlocks float64 `stm:"bytes_in_future_replicated_blocks"` 111 //PendingDeletionReplicatedBlocks float64 `stm:"pending_deletion_replicated_blocks"` 112 //TotalReplicatedBlocks float64 `stm:"total_replicated_blocks"` 113 //LowRedundancyECBlockGroups float64 `stm:"low_redundancy_ec_block_groups"` 114 //CorruptECBlockGroups float64 `stm:"corrupt_ec_block_groups"` 115 //MissingECBlockGroups float64 `stm:"missing_ec_block_groups"` 116 //BytesInFutureECBlockGroups float64 `stm:"bytes_in_future_ec_block_groups"` 117 //PendingDeletionECBlocks float64 `stm:"pending_deletion_ec_blocks"` 118 //TotalECBlockGroups float64 `stm:"total_ec_block_groups"` 119 //ExcessBlocks float64 `stm:"excess_blocks"` 120 //NumTimedOutPendingReconstructions float64 `stm:"num_timed_out_pending_reconstructions"` 121 //PostponedMisreplicatedBlocks float64 `stm:"postponed_misreplicated_blocks"` 122 //PendingDataNodeMessageCount float64 `stm:"pending_data_node_message_count"` 123 //MillisSinceLastLoadedEdits float64 `stm:"millis_since_last_loaded_edits"` 124 //BlockCapacity float64 `stm:"block_capacity"` 125 NumLiveDataNodes float64 `stm:"num_live_data_nodes"` 126 NumDeadDataNodes float64 `stm:"num_dead_data_nodes"` 127 //NumDecomLiveDataNodes float64 `stm:"num_decom_live_data_nodes"` 128 //NumDecomDeadDataNodes float64 `stm:"num_decom_dead_data_nodes"` 129 VolumeFailuresTotal float64 `stm:"volume_failures_total"` 130 //EstimatedCapacityLostTotal float64 `stm:"estimated_capacity_lost_total"` 131 //NumDecommissioningDataNodes float64 `stm:"num_decommissioning_data_nodes"` 132 StaleDataNodes float64 `stm:"stale_data_nodes"` 133 //NumStaleStorages float64 `stm:"num_stale_storages"` 134 //TotalSyncCount float64 `stm:"total_sync_count"` 135 //NumInMaintenanceLiveDataNodes float64 `stm:"num_in_maintenance_live_data_nodes"` 136 //NumInMaintenanceDeadDataNodes float64 `stm:"num_in_maintenance_dead_data_nodes"` 137 //NumEnteringMaintenanceDataNodes float64 `stm:"num_entering_maintenance_data_nodes"` 138 139 // custom attributes 140 CapacityUsed float64 `json:"-" stm:"capacity_used"` 141 } 142 143 type fsDatasetStateMetrics struct { 144 HostName string `json:"tag.Hostname"` 145 Capacity float64 `stm:"capacity_total"` 146 DfsUsed float64 `stm:"capacity_used_dfs"` 147 Remaining float64 `stm:"capacity_remaining"` 148 NumFailedVolumes float64 `stm:"num_failed_volumes"` 149 //LastVolumeFailureDate float64 `stm:"LastVolumeFailureDate"` 150 //EstimatedCapacityLostTotal float64 `stm:"EstimatedCapacityLostTotal"` 151 //CacheUsed float64 `stm:"CacheUsed"` 152 //CacheCapacity float64 `stm:"CacheCapacity"` 153 //NumBlocksCached float64 `stm:"NumBlocksCached"` 154 //NumBlocksFailedToCache float64 `stm:"NumBlocksFailedToCache"` 155 //NumBlocksFailedToUnCache float64 `stm:"NumBlocksFailedToUnCache"` 156 157 // custom attributes 158 CapacityUsedNonDFS float64 `stm:"capacity_used_non_dfs"` 159 CapacityUsed float64 `stm:"capacity_used"` 160 } 161 162 type dataNodeActivityMetrics struct { 163 HostName string `json:"tag.Hostname"` 164 BytesWritten float64 `stm:"bytes_written"` 165 //TotalWriteTime float64 166 BytesRead float64 `stm:"bytes_read"` 167 //TotalReadTime float64 168 //BlocksWritten float64 169 //BlocksRead float64 170 //BlocksReplicated float64 171 //BlocksRemoved float64 172 //BlocksVerified float64 173 //BlockVerificationFailures float64 174 //BlocksCached float64 175 //BlocksUncached float64 176 //ReadsFromLocalClient float64 177 //ReadsFromRemoteClient float64 178 //WritesFromLocalClient float64 179 //WritesFromRemoteClient float64 180 //BlocksGetLocalPathInfo float64 181 //RemoteBytesRead float64 182 //RemoteBytesWritten float64 183 //RamDiskBlocksWrite float64 184 //RamDiskBlocksWriteFallback float64 185 //RamDiskBytesWrite float64 186 //RamDiskBlocksReadHits float64 187 //RamDiskBlocksEvicted float64 188 //RamDiskBlocksEvictedWithoutRead float64 189 //RamDiskBlocksEvictionWindowMsNumOps float64 190 //RamDiskBlocksEvictionWindowMsAvgTime float64 191 //RamDiskBlocksLazyPersisted float64 192 //RamDiskBlocksDeletedBeforeLazyPersisted float64 193 //RamDiskBytesLazyPersisted float64 194 //RamDiskBlocksLazyPersistWindowMsNumOps float64 195 //RamDiskBlocksLazyPersistWindowMsAvgTime float64 196 //FsyncCount float64 197 //VolumeFailures float64 198 //DatanodeNetworkErrors float64 199 //DataNodeActiveXceiversCount float64 200 //ReadBlockOpNumOps float64 201 //ReadBlockOpAvgTime float64 202 //WriteBlockOpNumOps float64 203 //WriteBlockOpAvgTime float64 204 //BlockChecksumOpNumOps float64 205 //BlockChecksumOpAvgTime float64 206 //CopyBlockOpNumOps float64 207 //CopyBlockOpAvgTime float64 208 //ReplaceBlockOpNumOps float64 209 //ReplaceBlockOpAvgTime float64 210 //HeartbeatsNumOps float64 211 //HeartbeatsAvgTime float64 212 //HeartbeatsTotalNumOps float64 213 //HeartbeatsTotalAvgTime float64 214 //LifelinesNumOps float64 215 //LifelinesAvgTime float64 216 //BlockReportsNumOps float64 217 //BlockReportsAvgTime float64 218 //IncrementalBlockReportsNumOps float64 219 //IncrementalBlockReportsAvgTime float64 220 //CacheReportsNumOps float64 221 //CacheReportsAvgTime float64 222 //PacketAckRoundTripTimeNanosNumOps float64 223 //PacketAckRoundTripTimeNanosAvgTime float64 224 //FlushNanosNumOps float64 225 //FlushNanosAvgTime float64 226 //FsyncNanosNumOps float64 227 //FsyncNanosAvgTime float64 228 //SendDataPacketBlockedOnNetworkNanosNumOps float64 229 //SendDataPacketBlockedOnNetworkNanosAvgTime float64 230 //SendDataPacketTransferNanosNumOps float64 231 //SendDataPacketTransferNanosAvgTime float64 232 //BlocksInPendingIBR float64 233 //BlocksReceivingInPendingIBR float64 234 //BlocksReceivedInPendingIBR float64 235 //BlocksDeletedInPendingIBR float64 236 //EcReconstructionTasks float64 237 //EcFailedReconstructionTasks float64 238 //EcDecodingTimeNanos float64 239 //EcReconstructionBytesRead float64 240 //EcReconstructionBytesWritten float64 241 //EcReconstructionRemoteBytesRead float64 242 //EcReconstructionReadTimeMillis float64 243 //EcReconstructionDecodingTimeMillis float64 244 //EcReconstructionWriteTimeMillis float64 245 }