github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/resourcemanager/fetcher/system/systemplugin.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package system
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/pkg/errors"
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	"k8s.io/klog/v2"
    30  
    31  	nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    32  	"github.com/kubewharf/katalyst-api/pkg/protocol/reporterplugin/v1alpha1"
    33  	"github.com/kubewharf/katalyst-core/pkg/agent/resourcemanager/fetcher/plugin"
    34  	pluginutil "github.com/kubewharf/katalyst-core/pkg/agent/resourcemanager/fetcher/util"
    35  	"github.com/kubewharf/katalyst-core/pkg/config"
    36  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    37  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    38  	"github.com/kubewharf/katalyst-core/pkg/util"
    39  	"github.com/kubewharf/katalyst-core/pkg/util/process"
    40  )
    41  
    42  const (
    43  	PluginName = "system-reporter-plugin"
    44  
    45  	ResourceNameNBW v1.ResourceName = "nbw"
    46  
    47  	PropertyNameCIS      = "cis"
    48  	PropertyNameNUMA     = "numa"
    49  	PropertyNameTopology = "topology"
    50  )
    51  
    52  // systemPlugin implements the endpoint interface, and it's an in-tree reporter plugin
    53  type systemPlugin struct {
    54  	// conf is used to indicate the file path and name for system data in the future
    55  	// currently, it's not used todo: implement this logic
    56  	conf *config.Configuration
    57  
    58  	mutex                       sync.Mutex
    59  	latestReportContentResponse *v1alpha1.GetReportContentResponse
    60  
    61  	*process.StopControl
    62  	emitter    metrics.MetricEmitter
    63  	metaServer *metaserver.MetaServer
    64  }
    65  
    66  func NewSystemReporterPlugin(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer,
    67  	conf *config.Configuration, _ plugin.ListAndWatchCallback,
    68  ) (plugin.ReporterPlugin, error) {
    69  	p := &systemPlugin{
    70  		conf:        conf,
    71  		emitter:     emitter,
    72  		metaServer:  metaServer,
    73  		StopControl: process.NewStopControl(time.Time{}),
    74  	}
    75  
    76  	return p, nil
    77  }
    78  
    79  func (p *systemPlugin) Name() string {
    80  	return PluginName
    81  }
    82  
    83  func (p *systemPlugin) Run(success chan<- bool) {
    84  	success <- true
    85  	select {}
    86  }
    87  
    88  func (p *systemPlugin) GetReportContent(_ context.Context) (*v1alpha1.GetReportContentResponse, error) {
    89  	content, err := pluginutil.AppendReportContent(
    90  		p.getResourceProperties,
    91  	)
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	resp := &v1alpha1.GetReportContentResponse{
    97  		Content: content,
    98  	}
    99  
   100  	p.setCache(resp)
   101  
   102  	return resp, nil
   103  }
   104  
   105  func (p *systemPlugin) ListAndWatchReportContentCallback(_ string, _ *v1alpha1.GetReportContentResponse) {
   106  }
   107  
   108  func (p *systemPlugin) GetCache() *v1alpha1.GetReportContentResponse {
   109  	p.mutex.Lock()
   110  	defer p.mutex.Unlock()
   111  
   112  	return p.latestReportContentResponse
   113  }
   114  
   115  func (p *systemPlugin) setCache(resp *v1alpha1.GetReportContentResponse) {
   116  	p.mutex.Lock()
   117  	defer p.mutex.Unlock()
   118  
   119  	p.latestReportContentResponse = resp
   120  }
   121  
   122  func (p *systemPlugin) getResourceProperties() ([]*v1alpha1.ReportContent, error) {
   123  	var properties []*nodev1alpha1.Property
   124  
   125  	// append all properties to one property list
   126  	properties = append(properties,
   127  		p.getNUMACount(),
   128  		p.getNetworkBandwidth(),
   129  		p.getCPUCount(),
   130  		p.getMemoryCapacity(),
   131  		p.getCISProperty(),
   132  		p.getNetworkTopologyProperty(),
   133  	)
   134  
   135  	value, err := json.Marshal(&properties)
   136  	if err != nil {
   137  		return nil, errors.Wrap(err, "marshal resource properties failed")
   138  	}
   139  
   140  	return []*v1alpha1.ReportContent{
   141  		{
   142  			GroupVersionKind: &util.CNRGroupVersionKind,
   143  			Field: []*v1alpha1.ReportField{
   144  				{
   145  					FieldType: v1alpha1.FieldType_Spec,
   146  					FieldName: util.CNRFieldNameNodeResourceProperties,
   147  					Value:     value,
   148  				},
   149  			},
   150  		},
   151  	}, nil
   152  }
   153  
   154  // getNUMACount get numa count of this machine.
   155  func (p *systemPlugin) getNUMACount() *nodev1alpha1.Property {
   156  	return &nodev1alpha1.Property{
   157  		PropertyName:     PropertyNameNUMA,
   158  		PropertyQuantity: resource.NewQuantity(int64(p.metaServer.CPUTopology.NumNUMANodes), resource.DecimalSI),
   159  	}
   160  }
   161  
   162  // getNetworkBandwidth get max network bandwidth of all the interfaces in this machine.
   163  func (p *systemPlugin) getNetworkBandwidth() *nodev1alpha1.Property {
   164  	// check all interface, save max speed of all enabled interfaces
   165  	max := -1
   166  	for _, net := range p.metaServer.ExtraNetworkInfo.Interface {
   167  		if net.Enable && net.Speed > max {
   168  			max = net.Speed
   169  		}
   170  	}
   171  
   172  	return &nodev1alpha1.Property{
   173  		PropertyName:     fmt.Sprintf("%v", ResourceNameNBW),
   174  		PropertyQuantity: resource.NewQuantity(int64(max), resource.DecimalSI),
   175  	}
   176  }
   177  
   178  // getCPUCount get cpu count of this machine.
   179  func (p *systemPlugin) getCPUCount() *nodev1alpha1.Property {
   180  	return &nodev1alpha1.Property{
   181  		PropertyName:     fmt.Sprintf("%v", v1.ResourceCPU),
   182  		PropertyQuantity: resource.NewQuantity(int64(p.metaServer.MachineInfo.NumCores), resource.DecimalSI),
   183  	}
   184  }
   185  
   186  // getMemoryCapacity get memory capacity of this machine.
   187  func (p *systemPlugin) getMemoryCapacity() *nodev1alpha1.Property {
   188  	return &nodev1alpha1.Property{
   189  		PropertyName:     fmt.Sprintf("%v", v1.ResourceMemory),
   190  		PropertyQuantity: resource.NewQuantity(int64(p.metaServer.MachineInfo.MemoryCapacity), resource.BinarySI),
   191  	}
   192  }
   193  
   194  func (p *systemPlugin) getCISProperty() *nodev1alpha1.Property {
   195  	return &nodev1alpha1.Property{
   196  		PropertyName:   PropertyNameCIS,
   197  		PropertyValues: p.metaServer.SupportInstructionSet.List(),
   198  	}
   199  }
   200  
   201  // getNetworkTopologyProperty get network interface info of each interface in this machine.
   202  func (p *systemPlugin) getNetworkTopologyProperty() *nodev1alpha1.Property {
   203  	propertyValues := make([]string, 0, len(p.metaServer.ExtraNetworkInfo.Interface))
   204  
   205  	// construct property values for each interface, each interface with
   206  	// one property value
   207  	for _, net := range p.metaServer.ExtraNetworkInfo.Interface {
   208  		netBytes, err := json.Marshal(net)
   209  		if err != nil {
   210  			klog.Warningf("marshal network info failed: %s", err)
   211  			return nil
   212  		}
   213  
   214  		propertyValues = append(propertyValues, string(netBytes))
   215  	}
   216  
   217  	return &nodev1alpha1.Property{
   218  		PropertyName:   PropertyNameTopology,
   219  		PropertyValues: propertyValues,
   220  	}
   221  }