github.com/jingruilea/kubeedge@v1.2.0-beta.0.0.20200410162146-4bb8902b3879/edge/pkg/edged/edged_status.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 16 @CHANGELOG 17 KubeEdge Authors: To create mini-kubelet for edge deployment scenario, 18 This file is derived from K8S Kubelet code with reduced set of methods 19 Changes done are 20 1. setNodeReadyCondition is partially come from "k8s.io/kubernetes/pkg/kubelet.setNodeReadyCondition" 21 */ 22 23 package edged 24 25 import ( 26 "fmt" 27 "io/ioutil" 28 "os" 29 "regexp" 30 "runtime" 31 "strconv" 32 "strings" 33 "time" 34 35 v1 "k8s.io/api/core/v1" 36 "k8s.io/apimachinery/pkg/api/resource" 37 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 38 "k8s.io/apimachinery/pkg/util/sets" 39 "k8s.io/klog" 40 41 beehiveContext "github.com/kubeedge/beehive/pkg/core/context" 42 "github.com/kubeedge/beehive/pkg/core/model" 43 edgeapi "github.com/kubeedge/kubeedge/common/types" 44 "github.com/kubeedge/kubeedge/edge/pkg/common/message" 45 "github.com/kubeedge/kubeedge/edge/pkg/common/modules" 46 "github.com/kubeedge/kubeedge/edge/pkg/edged/apis" 47 "github.com/kubeedge/kubeedge/edge/pkg/edged/config" 48 "github.com/kubeedge/kubeedge/edge/pkg/edgehub" 49 "github.com/kubeedge/kubeedge/pkg/util" 50 ) 51 52 //GPUInfoQueryTool sets information monitoring tool location for GPU 53 var GPUInfoQueryTool = "/var/IEF/nvidia/bin/nvidia-smi" 54 var initNode v1.Node 55 var reservationMemory = resource.MustParse(fmt.Sprintf("%dMi", 100)) 56 57 func (e *edged) initialNode() (*v1.Node, error) { 58 var node = &v1.Node{} 59 60 if runtime.GOOS == "windows" { 61 return node, nil 62 } 63 64 nodeInfo, err := e.getNodeInfo() 65 if err != nil { 66 return nil, err 67 } 68 node.Status.NodeInfo = nodeInfo 69 70 hostname, err := os.Hostname() 71 if err != nil { 72 klog.Errorf("couldn't determine hostname: %v", err) 73 } 74 75 ip, err := e.getIP() 76 if err != nil { 77 return nil, err 78 } 79 node.Status.Addresses = []v1.NodeAddress{ 80 {Type: v1.NodeInternalIP, Address: ip}, 81 {Type: v1.NodeHostName, Address: hostname}, 82 } 83 84 node.Status.Capacity = make(v1.ResourceList) 85 node.Status.Allocatable = make(v1.ResourceList) 86 err = e.setMemInfo(node.Status.Capacity, node.Status.Allocatable) 87 if err != nil { 88 return nil, err 89 } 90 91 err = e.setCPUInfo(node.Status.Capacity, node.Status.Allocatable) 92 if err != nil { 93 return nil, err 94 } 95 96 node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) 97 node.Status.Allocatable[v1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) 98 99 return node, nil 100 } 101 102 func (e *edged) setInitNode(node *v1.Node) { 103 initNode.Status = *node.Status.DeepCopy() 104 } 105 106 // Retrieve node status 107 func retrieveDevicePluginStatus(s string) (string, error) { 108 tagLen := len(apis.StatusTag) 109 if len(s) <= tagLen { 110 return "", fmt.Errorf("no node status wrapped in") 111 } 112 113 tag := s[:tagLen] 114 if string(tag) != apis.StatusTag { 115 return "", fmt.Errorf("not a node status json string") 116 } 117 statusList := s[tagLen:] 118 klog.Infof("retrieve piggybacked status: %v", statusList) 119 return statusList, nil 120 } 121 122 func (e *edged) getNodeStatusRequest(node *v1.Node) (*edgeapi.NodeStatusRequest, error) { 123 var nodeStatus = &edgeapi.NodeStatusRequest{} 124 nodeStatus.UID = e.uid 125 nodeStatus.Status = *node.Status.DeepCopy() 126 nodeStatus.Status.Phase = e.getNodePhase() 127 128 devicePluginCapacity, _, removedDevicePlugins := e.getDevicePluginResourceCapacity() 129 if devicePluginCapacity != nil { 130 for k, v := range devicePluginCapacity { 131 klog.Infof("Update capacity for %s to %d", k, v.Value()) 132 nodeStatus.Status.Capacity[k] = v 133 nodeStatus.Status.Allocatable[k] = v 134 } 135 } 136 137 nameSet := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory), string(v1.ResourceStorage), 138 string(v1.ResourceEphemeralStorage), string(apis.NvidiaGPUScalarResourceName)) 139 140 for _, removedResource := range removedDevicePlugins { 141 // if the remmovedReousrce is not contained in the nameSet and contains specific tag 142 if !nameSet.Has(removedResource) { 143 status, err := retrieveDevicePluginStatus(removedResource) 144 if err == nil { 145 if node.Annotations == nil { 146 node.Annotations = make(map[string]string) 147 } 148 node.Annotations[apis.NvidiaGPUStatusAnnotationKey] = status 149 klog.Infof("Setting node annotation to add node status list to Scheduler") 150 continue 151 } 152 } 153 klog.Infof("Remove capacity for %s", removedResource) 154 delete(node.Status.Capacity, v1.ResourceName(removedResource)) 155 } 156 e.setNodeStatusDaemonEndpoints(nodeStatus) 157 e.setNodeStatusConditions(nodeStatus) 158 if e.gpuPluginEnabled { 159 err := e.setGPUInfo(nodeStatus) 160 if err != nil { 161 klog.Errorf("setGPUInfo failed, err: %v", err) 162 } 163 } 164 if e.volumeManager.ReconcilerStatesHasBeenSynced() { 165 node.Status.VolumesInUse = e.volumeManager.GetVolumesInUse() 166 } else { 167 node.Status.VolumesInUse = nil 168 } 169 e.volumeManager.MarkVolumesAsReportedInUse(node.Status.VolumesInUse) 170 klog.Infof("Sync VolumesInUse: %v", node.Status.VolumesInUse) 171 172 return nodeStatus, nil 173 } 174 175 func (e *edged) setNodeStatusDaemonEndpoints(node *edgeapi.NodeStatusRequest) { 176 node.Status.DaemonEndpoints = v1.NodeDaemonEndpoints{ 177 KubeletEndpoint: v1.DaemonEndpoint{ 178 Port: config.KubeletPort, 179 }, 180 } 181 } 182 183 func (e *edged) setNodeStatusConditions(node *edgeapi.NodeStatusRequest) { 184 e.setNodeReadyCondition(node) 185 } 186 187 // setNodeReadyCondition is partially come from "k8s.io/kubernetes/pkg/kubelet.setNodeReadyCondition" 188 func (e *edged) setNodeReadyCondition(node *edgeapi.NodeStatusRequest) { 189 currentTime := metav1.NewTime(time.Now()) 190 var newNodeReadyCondition v1.NodeCondition 191 192 var err error 193 _, err = e.containerRuntime.Version() 194 195 if err != nil { 196 newNodeReadyCondition = v1.NodeCondition{ 197 Type: v1.NodeReady, 198 Status: v1.ConditionFalse, 199 Reason: "EdgeNotReady", 200 Message: err.Error(), 201 LastHeartbeatTime: currentTime, 202 } 203 } else { 204 newNodeReadyCondition = v1.NodeCondition{ 205 Type: v1.NodeReady, 206 Status: v1.ConditionTrue, 207 Reason: "EdgeReady", 208 Message: "edge is posting ready status", 209 LastHeartbeatTime: currentTime, 210 } 211 } 212 213 readyConditionUpdated := false 214 for i := range node.Status.Conditions { 215 if node.Status.Conditions[i].Type == v1.NodeReady { 216 if node.Status.Conditions[i].Status == newNodeReadyCondition.Status { 217 newNodeReadyCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime 218 } else { 219 newNodeReadyCondition.LastTransitionTime = currentTime 220 } 221 node.Status.Conditions[i] = newNodeReadyCondition 222 readyConditionUpdated = true 223 break 224 } 225 } 226 if !readyConditionUpdated { 227 newNodeReadyCondition.LastTransitionTime = currentTime 228 node.Status.Conditions = append(node.Status.Conditions, newNodeReadyCondition) 229 } 230 231 } 232 233 func (e *edged) getNodeInfo() (v1.NodeSystemInfo, error) { 234 nodeInfo := v1.NodeSystemInfo{} 235 kernel, err := util.Command("uname", []string{"-r"}) 236 if err != nil { 237 return nodeInfo, err 238 } 239 240 prettyName, err := util.Command("sh", []string{"-c", `cat /etc/os-release | grep PRETTY_NAME| awk -F '"' '{print$2}'`}) 241 if err != nil { 242 return nodeInfo, err 243 } 244 245 runtimeVersion, err := e.containerRuntime.Version() 246 if err != nil { 247 return nodeInfo, err 248 } 249 nodeInfo.ContainerRuntimeVersion = fmt.Sprintf("remote://%s", runtimeVersion.String()) 250 251 nodeInfo.KernelVersion = kernel 252 nodeInfo.OperatingSystem = runtime.GOOS 253 nodeInfo.Architecture = runtime.GOARCH 254 nodeInfo.KubeletVersion = e.version 255 nodeInfo.OSImage = prettyName 256 //nodeInfo.ContainerRuntimeVersion = fmt.Sprintf("docker://%s", runtimeVersion.String()) 257 258 return nodeInfo, nil 259 260 } 261 262 func (e *edged) setGPUInfo(nodeStatus *edgeapi.NodeStatusRequest) error { 263 _, err := os.Stat(GPUInfoQueryTool) 264 if err != nil { 265 return fmt.Errorf("can not get file in path: %s, err: %v", GPUInfoQueryTool, err) 266 } 267 268 nodeStatus.ExtendResources = make(map[v1.ResourceName][]edgeapi.ExtendResource) 269 270 result, err := util.Command("sh", []string{"-c", fmt.Sprintf("%s -L", GPUInfoQueryTool)}) 271 if err != nil { 272 return err 273 } 274 re := regexp.MustCompile(`GPU .*:.*\(.*\)`) 275 gpuInfos := re.FindAllString(result, -1) 276 gpuResources := make([]edgeapi.ExtendResource, 0) 277 gpuRegexp := regexp.MustCompile(`^GPU ([\d]+):(.*)\(.*\)`) 278 for _, gpuInfo := range gpuInfos { 279 params := gpuRegexp.FindStringSubmatch(strings.TrimSpace(gpuInfo)) 280 if len(params) != 3 { 281 klog.Errorf("parse gpu failed, gpuInfo: %v, params: %v", gpuInfo, params) 282 continue 283 } 284 gpuName := params[1] 285 gpuType := params[2] 286 result, err = util.Command("sh", []string{"-c", fmt.Sprintf("%s -i %s -a|grep -A 3 \"FB Memory Usage\"| grep Total", GPUInfoQueryTool, gpuName)}) 287 if err != nil { 288 klog.Errorf("get gpu(%v) memory failed, err: %v", gpuName, err) 289 continue 290 } 291 parts := strings.Split(result, ":") 292 if len(parts) != 2 { 293 klog.Errorf("parse gpu(%v) memory failed, parts: %v", gpuName, parts) 294 continue 295 } 296 mem := strings.TrimSpace(strings.Split(strings.TrimSpace(parts[1]), " ")[0]) 297 298 gpuResource := edgeapi.ExtendResource{} 299 gpuResource.Name = fmt.Sprintf("nvidia%v", gpuName) 300 gpuResource.Type = gpuType 301 gpuResource.Capacity = resource.MustParse(mem + "Mi") 302 gpuResources = append(gpuResources, gpuResource) 303 } 304 305 nodeStatus.ExtendResources[apis.NvidiaGPUResource] = gpuResources 306 return nil 307 } 308 309 func (e *edged) getIP() (string, error) { 310 if nodeIP := config.Config.NodeIP; nodeIP != "" { 311 return nodeIP, nil 312 } 313 hostName, _ := os.Hostname() 314 if hostName == "" { 315 hostName = e.nodeName 316 } 317 return util.GetLocalIP(hostName) 318 } 319 320 func (e *edged) setMemInfo(total, allocated v1.ResourceList) error { 321 out, err := ioutil.ReadFile("/proc/meminfo") 322 if err != nil { 323 return err 324 } 325 matches := regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`).FindSubmatch(out) 326 if len(matches) != 2 { 327 return fmt.Errorf("failed to match regexp in output: %q", string(out)) 328 } 329 m, err := strconv.ParseInt(string(matches[1]), 10, 64) 330 if err != nil { 331 return err 332 } 333 totalMem := m / 1024 334 mem := resource.MustParse(strconv.FormatInt(totalMem, 10) + "Mi") 335 total[v1.ResourceMemory] = mem.DeepCopy() 336 337 if mem.Cmp(reservationMemory) > 0 { 338 mem.Sub(reservationMemory) 339 } 340 allocated[v1.ResourceMemory] = mem.DeepCopy() 341 342 return nil 343 } 344 345 func (e *edged) setCPUInfo(total, allocated v1.ResourceList) error { 346 total[v1.ResourceCPU] = resource.MustParse(fmt.Sprintf("%d", runtime.NumCPU())) 347 allocated[v1.ResourceCPU] = total[v1.ResourceCPU].DeepCopy() 348 349 return nil 350 } 351 352 func (e *edged) getDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) { 353 return e.containerManager.GetDevicePluginResourceCapacity() 354 } 355 356 func (e *edged) getNodePhase() v1.NodePhase { 357 return v1.NodeRunning 358 } 359 360 func (e *edged) registerNode() error { 361 node, err := e.initialNode() 362 if err != nil { 363 klog.Errorf("Unable to construct v1.Node object for edge: %v", err) 364 return err 365 } 366 367 e.setInitNode(node) 368 369 if config.Config.RegisterNode == false { 370 //when register-node set to false, do not auto register node 371 klog.Infof("register-node is set to false") 372 e.registrationCompleted = true 373 return nil 374 } 375 376 klog.Infof("Attempting to register node %s", e.nodeName) 377 378 resource := fmt.Sprintf("%s/%s/%s", e.namespace, model.ResourceTypeNodeStatus, e.nodeName) 379 nodeInfoMsg := message.BuildMsg(modules.MetaGroup, "", modules.EdgedModuleName, resource, model.InsertOperation, node) 380 res, err := beehiveContext.SendSync(edgehub.ModuleNameEdgeHub, *nodeInfoMsg, syncMsgRespTimeout) 381 if err != nil || res.Content != "OK" { 382 klog.Errorf("register node failed, error: %v", err) 383 return err 384 } 385 386 klog.Infof("Successfully registered node %s", e.nodeName) 387 e.registrationCompleted = true 388 389 return nil 390 } 391 392 func (e *edged) updateNodeStatus() error { 393 nodeStatus, err := e.getNodeStatusRequest(&initNode) 394 if err != nil { 395 klog.Errorf("Unable to construct api.NodeStatusRequest object for edge: %v", err) 396 return err 397 } 398 399 err = e.metaClient.NodeStatus(e.namespace).Update(e.nodeName, *nodeStatus) 400 if err != nil { 401 klog.Errorf("update node failed, error: %v", err) 402 } 403 return nil 404 } 405 406 func (e *edged) syncNodeStatus() { 407 if !e.registrationCompleted { 408 if err := e.registerNode(); err != nil { 409 klog.Errorf("Register node failed: %v", err) 410 } 411 } else { 412 if err := e.updateNodeStatus(); err != nil { 413 klog.Errorf("Unable to update node status: %v", err) 414 } 415 } 416 }