github.com/jingruilea/kubeedge@v1.2.0-beta.0.0.20200410162146-4bb8902b3879/edge/pkg/edged/edged_status.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  
    16  @CHANGELOG
    17  KubeEdge Authors: To create mini-kubelet for edge deployment scenario,
    18  This file is derived from K8S Kubelet code with reduced set of methods
    19  Changes done are
    20  1. setNodeReadyCondition is partially come from "k8s.io/kubernetes/pkg/kubelet.setNodeReadyCondition"
    21  */
    22  
    23  package edged
    24  
    25  import (
    26  	"fmt"
    27  	"io/ioutil"
    28  	"os"
    29  	"regexp"
    30  	"runtime"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	v1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/api/resource"
    37  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    38  	"k8s.io/apimachinery/pkg/util/sets"
    39  	"k8s.io/klog"
    40  
    41  	beehiveContext "github.com/kubeedge/beehive/pkg/core/context"
    42  	"github.com/kubeedge/beehive/pkg/core/model"
    43  	edgeapi "github.com/kubeedge/kubeedge/common/types"
    44  	"github.com/kubeedge/kubeedge/edge/pkg/common/message"
    45  	"github.com/kubeedge/kubeedge/edge/pkg/common/modules"
    46  	"github.com/kubeedge/kubeedge/edge/pkg/edged/apis"
    47  	"github.com/kubeedge/kubeedge/edge/pkg/edged/config"
    48  	"github.com/kubeedge/kubeedge/edge/pkg/edgehub"
    49  	"github.com/kubeedge/kubeedge/pkg/util"
    50  )
    51  
    52  //GPUInfoQueryTool sets information monitoring tool location for GPU
    53  var GPUInfoQueryTool = "/var/IEF/nvidia/bin/nvidia-smi"
    54  var initNode v1.Node
    55  var reservationMemory = resource.MustParse(fmt.Sprintf("%dMi", 100))
    56  
    57  func (e *edged) initialNode() (*v1.Node, error) {
    58  	var node = &v1.Node{}
    59  
    60  	if runtime.GOOS == "windows" {
    61  		return node, nil
    62  	}
    63  
    64  	nodeInfo, err := e.getNodeInfo()
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	node.Status.NodeInfo = nodeInfo
    69  
    70  	hostname, err := os.Hostname()
    71  	if err != nil {
    72  		klog.Errorf("couldn't determine hostname: %v", err)
    73  	}
    74  
    75  	ip, err := e.getIP()
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	node.Status.Addresses = []v1.NodeAddress{
    80  		{Type: v1.NodeInternalIP, Address: ip},
    81  		{Type: v1.NodeHostName, Address: hostname},
    82  	}
    83  
    84  	node.Status.Capacity = make(v1.ResourceList)
    85  	node.Status.Allocatable = make(v1.ResourceList)
    86  	err = e.setMemInfo(node.Status.Capacity, node.Status.Allocatable)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  
    91  	err = e.setCPUInfo(node.Status.Capacity, node.Status.Allocatable)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
    97  	node.Status.Allocatable[v1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
    98  
    99  	return node, nil
   100  }
   101  
   102  func (e *edged) setInitNode(node *v1.Node) {
   103  	initNode.Status = *node.Status.DeepCopy()
   104  }
   105  
   106  // Retrieve node status
   107  func retrieveDevicePluginStatus(s string) (string, error) {
   108  	tagLen := len(apis.StatusTag)
   109  	if len(s) <= tagLen {
   110  		return "", fmt.Errorf("no node status wrapped in")
   111  	}
   112  
   113  	tag := s[:tagLen]
   114  	if string(tag) != apis.StatusTag {
   115  		return "", fmt.Errorf("not a node status json string")
   116  	}
   117  	statusList := s[tagLen:]
   118  	klog.Infof("retrieve piggybacked status: %v", statusList)
   119  	return statusList, nil
   120  }
   121  
   122  func (e *edged) getNodeStatusRequest(node *v1.Node) (*edgeapi.NodeStatusRequest, error) {
   123  	var nodeStatus = &edgeapi.NodeStatusRequest{}
   124  	nodeStatus.UID = e.uid
   125  	nodeStatus.Status = *node.Status.DeepCopy()
   126  	nodeStatus.Status.Phase = e.getNodePhase()
   127  
   128  	devicePluginCapacity, _, removedDevicePlugins := e.getDevicePluginResourceCapacity()
   129  	if devicePluginCapacity != nil {
   130  		for k, v := range devicePluginCapacity {
   131  			klog.Infof("Update capacity for %s to %d", k, v.Value())
   132  			nodeStatus.Status.Capacity[k] = v
   133  			nodeStatus.Status.Allocatable[k] = v
   134  		}
   135  	}
   136  
   137  	nameSet := sets.NewString(string(v1.ResourceCPU), string(v1.ResourceMemory), string(v1.ResourceStorage),
   138  		string(v1.ResourceEphemeralStorage), string(apis.NvidiaGPUScalarResourceName))
   139  
   140  	for _, removedResource := range removedDevicePlugins {
   141  		// if the remmovedReousrce is not contained in the nameSet and contains specific tag
   142  		if !nameSet.Has(removedResource) {
   143  			status, err := retrieveDevicePluginStatus(removedResource)
   144  			if err == nil {
   145  				if node.Annotations == nil {
   146  					node.Annotations = make(map[string]string)
   147  				}
   148  				node.Annotations[apis.NvidiaGPUStatusAnnotationKey] = status
   149  				klog.Infof("Setting node annotation to add node status list to Scheduler")
   150  				continue
   151  			}
   152  		}
   153  		klog.Infof("Remove capacity for %s", removedResource)
   154  		delete(node.Status.Capacity, v1.ResourceName(removedResource))
   155  	}
   156  	e.setNodeStatusDaemonEndpoints(nodeStatus)
   157  	e.setNodeStatusConditions(nodeStatus)
   158  	if e.gpuPluginEnabled {
   159  		err := e.setGPUInfo(nodeStatus)
   160  		if err != nil {
   161  			klog.Errorf("setGPUInfo failed, err: %v", err)
   162  		}
   163  	}
   164  	if e.volumeManager.ReconcilerStatesHasBeenSynced() {
   165  		node.Status.VolumesInUse = e.volumeManager.GetVolumesInUse()
   166  	} else {
   167  		node.Status.VolumesInUse = nil
   168  	}
   169  	e.volumeManager.MarkVolumesAsReportedInUse(node.Status.VolumesInUse)
   170  	klog.Infof("Sync VolumesInUse: %v", node.Status.VolumesInUse)
   171  
   172  	return nodeStatus, nil
   173  }
   174  
   175  func (e *edged) setNodeStatusDaemonEndpoints(node *edgeapi.NodeStatusRequest) {
   176  	node.Status.DaemonEndpoints = v1.NodeDaemonEndpoints{
   177  		KubeletEndpoint: v1.DaemonEndpoint{
   178  			Port: config.KubeletPort,
   179  		},
   180  	}
   181  }
   182  
   183  func (e *edged) setNodeStatusConditions(node *edgeapi.NodeStatusRequest) {
   184  	e.setNodeReadyCondition(node)
   185  }
   186  
   187  // setNodeReadyCondition is partially come from "k8s.io/kubernetes/pkg/kubelet.setNodeReadyCondition"
   188  func (e *edged) setNodeReadyCondition(node *edgeapi.NodeStatusRequest) {
   189  	currentTime := metav1.NewTime(time.Now())
   190  	var newNodeReadyCondition v1.NodeCondition
   191  
   192  	var err error
   193  	_, err = e.containerRuntime.Version()
   194  
   195  	if err != nil {
   196  		newNodeReadyCondition = v1.NodeCondition{
   197  			Type:              v1.NodeReady,
   198  			Status:            v1.ConditionFalse,
   199  			Reason:            "EdgeNotReady",
   200  			Message:           err.Error(),
   201  			LastHeartbeatTime: currentTime,
   202  		}
   203  	} else {
   204  		newNodeReadyCondition = v1.NodeCondition{
   205  			Type:              v1.NodeReady,
   206  			Status:            v1.ConditionTrue,
   207  			Reason:            "EdgeReady",
   208  			Message:           "edge is posting ready status",
   209  			LastHeartbeatTime: currentTime,
   210  		}
   211  	}
   212  
   213  	readyConditionUpdated := false
   214  	for i := range node.Status.Conditions {
   215  		if node.Status.Conditions[i].Type == v1.NodeReady {
   216  			if node.Status.Conditions[i].Status == newNodeReadyCondition.Status {
   217  				newNodeReadyCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime
   218  			} else {
   219  				newNodeReadyCondition.LastTransitionTime = currentTime
   220  			}
   221  			node.Status.Conditions[i] = newNodeReadyCondition
   222  			readyConditionUpdated = true
   223  			break
   224  		}
   225  	}
   226  	if !readyConditionUpdated {
   227  		newNodeReadyCondition.LastTransitionTime = currentTime
   228  		node.Status.Conditions = append(node.Status.Conditions, newNodeReadyCondition)
   229  	}
   230  
   231  }
   232  
   233  func (e *edged) getNodeInfo() (v1.NodeSystemInfo, error) {
   234  	nodeInfo := v1.NodeSystemInfo{}
   235  	kernel, err := util.Command("uname", []string{"-r"})
   236  	if err != nil {
   237  		return nodeInfo, err
   238  	}
   239  
   240  	prettyName, err := util.Command("sh", []string{"-c", `cat /etc/os-release | grep PRETTY_NAME| awk -F '"' '{print$2}'`})
   241  	if err != nil {
   242  		return nodeInfo, err
   243  	}
   244  
   245  	runtimeVersion, err := e.containerRuntime.Version()
   246  	if err != nil {
   247  		return nodeInfo, err
   248  	}
   249  	nodeInfo.ContainerRuntimeVersion = fmt.Sprintf("remote://%s", runtimeVersion.String())
   250  
   251  	nodeInfo.KernelVersion = kernel
   252  	nodeInfo.OperatingSystem = runtime.GOOS
   253  	nodeInfo.Architecture = runtime.GOARCH
   254  	nodeInfo.KubeletVersion = e.version
   255  	nodeInfo.OSImage = prettyName
   256  	//nodeInfo.ContainerRuntimeVersion = fmt.Sprintf("docker://%s", runtimeVersion.String())
   257  
   258  	return nodeInfo, nil
   259  
   260  }
   261  
   262  func (e *edged) setGPUInfo(nodeStatus *edgeapi.NodeStatusRequest) error {
   263  	_, err := os.Stat(GPUInfoQueryTool)
   264  	if err != nil {
   265  		return fmt.Errorf("can not get file in path: %s, err: %v", GPUInfoQueryTool, err)
   266  	}
   267  
   268  	nodeStatus.ExtendResources = make(map[v1.ResourceName][]edgeapi.ExtendResource)
   269  
   270  	result, err := util.Command("sh", []string{"-c", fmt.Sprintf("%s -L", GPUInfoQueryTool)})
   271  	if err != nil {
   272  		return err
   273  	}
   274  	re := regexp.MustCompile(`GPU .*:.*\(.*\)`)
   275  	gpuInfos := re.FindAllString(result, -1)
   276  	gpuResources := make([]edgeapi.ExtendResource, 0)
   277  	gpuRegexp := regexp.MustCompile(`^GPU ([\d]+):(.*)\(.*\)`)
   278  	for _, gpuInfo := range gpuInfos {
   279  		params := gpuRegexp.FindStringSubmatch(strings.TrimSpace(gpuInfo))
   280  		if len(params) != 3 {
   281  			klog.Errorf("parse gpu failed, gpuInfo: %v, params: %v", gpuInfo, params)
   282  			continue
   283  		}
   284  		gpuName := params[1]
   285  		gpuType := params[2]
   286  		result, err = util.Command("sh", []string{"-c", fmt.Sprintf("%s -i %s -a|grep -A 3 \"FB Memory Usage\"| grep Total", GPUInfoQueryTool, gpuName)})
   287  		if err != nil {
   288  			klog.Errorf("get gpu(%v) memory failed, err: %v", gpuName, err)
   289  			continue
   290  		}
   291  		parts := strings.Split(result, ":")
   292  		if len(parts) != 2 {
   293  			klog.Errorf("parse gpu(%v) memory failed, parts: %v", gpuName, parts)
   294  			continue
   295  		}
   296  		mem := strings.TrimSpace(strings.Split(strings.TrimSpace(parts[1]), " ")[0])
   297  
   298  		gpuResource := edgeapi.ExtendResource{}
   299  		gpuResource.Name = fmt.Sprintf("nvidia%v", gpuName)
   300  		gpuResource.Type = gpuType
   301  		gpuResource.Capacity = resource.MustParse(mem + "Mi")
   302  		gpuResources = append(gpuResources, gpuResource)
   303  	}
   304  
   305  	nodeStatus.ExtendResources[apis.NvidiaGPUResource] = gpuResources
   306  	return nil
   307  }
   308  
   309  func (e *edged) getIP() (string, error) {
   310  	if nodeIP := config.Config.NodeIP; nodeIP != "" {
   311  		return nodeIP, nil
   312  	}
   313  	hostName, _ := os.Hostname()
   314  	if hostName == "" {
   315  		hostName = e.nodeName
   316  	}
   317  	return util.GetLocalIP(hostName)
   318  }
   319  
   320  func (e *edged) setMemInfo(total, allocated v1.ResourceList) error {
   321  	out, err := ioutil.ReadFile("/proc/meminfo")
   322  	if err != nil {
   323  		return err
   324  	}
   325  	matches := regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`).FindSubmatch(out)
   326  	if len(matches) != 2 {
   327  		return fmt.Errorf("failed to match regexp in output: %q", string(out))
   328  	}
   329  	m, err := strconv.ParseInt(string(matches[1]), 10, 64)
   330  	if err != nil {
   331  		return err
   332  	}
   333  	totalMem := m / 1024
   334  	mem := resource.MustParse(strconv.FormatInt(totalMem, 10) + "Mi")
   335  	total[v1.ResourceMemory] = mem.DeepCopy()
   336  
   337  	if mem.Cmp(reservationMemory) > 0 {
   338  		mem.Sub(reservationMemory)
   339  	}
   340  	allocated[v1.ResourceMemory] = mem.DeepCopy()
   341  
   342  	return nil
   343  }
   344  
   345  func (e *edged) setCPUInfo(total, allocated v1.ResourceList) error {
   346  	total[v1.ResourceCPU] = resource.MustParse(fmt.Sprintf("%d", runtime.NumCPU()))
   347  	allocated[v1.ResourceCPU] = total[v1.ResourceCPU].DeepCopy()
   348  
   349  	return nil
   350  }
   351  
   352  func (e *edged) getDevicePluginResourceCapacity() (v1.ResourceList, v1.ResourceList, []string) {
   353  	return e.containerManager.GetDevicePluginResourceCapacity()
   354  }
   355  
   356  func (e *edged) getNodePhase() v1.NodePhase {
   357  	return v1.NodeRunning
   358  }
   359  
   360  func (e *edged) registerNode() error {
   361  	node, err := e.initialNode()
   362  	if err != nil {
   363  		klog.Errorf("Unable to construct v1.Node object for edge: %v", err)
   364  		return err
   365  	}
   366  
   367  	e.setInitNode(node)
   368  
   369  	if config.Config.RegisterNode == false {
   370  		//when register-node set to false, do not auto register node
   371  		klog.Infof("register-node is set to false")
   372  		e.registrationCompleted = true
   373  		return nil
   374  	}
   375  
   376  	klog.Infof("Attempting to register node %s", e.nodeName)
   377  
   378  	resource := fmt.Sprintf("%s/%s/%s", e.namespace, model.ResourceTypeNodeStatus, e.nodeName)
   379  	nodeInfoMsg := message.BuildMsg(modules.MetaGroup, "", modules.EdgedModuleName, resource, model.InsertOperation, node)
   380  	res, err := beehiveContext.SendSync(edgehub.ModuleNameEdgeHub, *nodeInfoMsg, syncMsgRespTimeout)
   381  	if err != nil || res.Content != "OK" {
   382  		klog.Errorf("register node failed, error: %v", err)
   383  		return err
   384  	}
   385  
   386  	klog.Infof("Successfully registered node %s", e.nodeName)
   387  	e.registrationCompleted = true
   388  
   389  	return nil
   390  }
   391  
   392  func (e *edged) updateNodeStatus() error {
   393  	nodeStatus, err := e.getNodeStatusRequest(&initNode)
   394  	if err != nil {
   395  		klog.Errorf("Unable to construct api.NodeStatusRequest object for edge: %v", err)
   396  		return err
   397  	}
   398  
   399  	err = e.metaClient.NodeStatus(e.namespace).Update(e.nodeName, *nodeStatus)
   400  	if err != nil {
   401  		klog.Errorf("update node failed, error: %v", err)
   402  	}
   403  	return nil
   404  }
   405  
   406  func (e *edged) syncNodeStatus() {
   407  	if !e.registrationCompleted {
   408  		if err := e.registerNode(); err != nil {
   409  			klog.Errorf("Register node failed: %v", err)
   410  		}
   411  	} else {
   412  		if err := e.updateNodeStatus(); err != nil {
   413  			klog.Errorf("Unable to update node status: %v", err)
   414  		}
   415  	}
   416  }