github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/resourcemanager/fetcher/kubelet/kubeletplugin.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package kubelet
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"sync"
    24  	"time"
    25  
    26  	info "github.com/google/cadvisor/info/v1"
    27  	"github.com/pkg/errors"
    28  	"go.uber.org/atomic"
    29  	"k8s.io/klog/v2"
    30  	apiconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
    31  
    32  	nodev1alpha1 "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1"
    33  	"github.com/kubewharf/katalyst-api/pkg/protocol/reporterplugin/v1alpha1"
    34  	"github.com/kubewharf/katalyst-api/pkg/utils"
    35  	"github.com/kubewharf/katalyst-core/pkg/agent/resourcemanager/fetcher/kubelet/topology"
    36  	"github.com/kubewharf/katalyst-core/pkg/agent/resourcemanager/fetcher/plugin"
    37  	"github.com/kubewharf/katalyst-core/pkg/config"
    38  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    39  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    40  	"github.com/kubewharf/katalyst-core/pkg/util"
    41  	"github.com/kubewharf/katalyst-core/pkg/util/kubelet/podresources"
    42  	"github.com/kubewharf/katalyst-core/pkg/util/process"
    43  )
    44  
    45  const (
    46  	// PluginName is name of kubelet reporter plugin
    47  	PluginName = "kubelet-reporter-plugin"
    48  )
    49  
    50  // kubeletPlugin implements the endpoint interface, and it's an in-tree reporter plugin
    51  type kubeletPlugin struct {
    52  	mutex sync.RWMutex
    53  
    54  	ctx    context.Context
    55  	cancel context.CancelFunc
    56  
    57  	// conf is used to indicate the file path and name for system data in the future
    58  	// currently, it's not used todo: implement this logic
    59  	conf *config.Configuration
    60  
    61  	topologyStatusAdapter topology.Adapter
    62  
    63  	// cb since kubeletPlugin needs to call updateContent whenever the topology changes,
    64  	// it needs a corresponding callback function
    65  	cb plugin.ListAndWatchCallback
    66  
    67  	// notifierCh channel sent by topology adapter to trigger ListAndWatch send to
    68  	// manager
    69  	notifierCh chan struct{}
    70  
    71  	latestReportContentResponse atomic.Value
    72  
    73  	*process.StopControl
    74  	emitter    metrics.MetricEmitter
    75  	metaServer *metaserver.MetaServer
    76  }
    77  
    78  // NewKubeletReporterPlugin creates a kubelet reporter plugin
    79  func NewKubeletReporterPlugin(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer,
    80  	conf *config.Configuration, callback plugin.ListAndWatchCallback,
    81  ) (plugin.ReporterPlugin, error) {
    82  	ctx, cancel := context.WithCancel(context.Background())
    83  	p := &kubeletPlugin{
    84  		emitter:     emitter,
    85  		metaServer:  metaServer,
    86  		conf:        conf,
    87  		notifierCh:  make(chan struct{}, 10),
    88  		ctx:         ctx,
    89  		cancel:      cancel,
    90  		cb:          callback,
    91  		StopControl: process.NewStopControl(time.Time{}),
    92  	}
    93  
    94  	topologyStatusAdapter, err := topology.NewPodResourcesServerTopologyAdapter(metaServer, conf.QoSConfiguration,
    95  		conf.PodResourcesServerEndpoints, conf.KubeletResourcePluginPaths, conf.ResourceNameToZoneTypeMap,
    96  		nil, p.getNumaInfo, topology.GenericPodResourcesFilter(conf.QoSConfiguration), podresources.GetV1Client,
    97  		conf.NeedValidationResources)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	p.topologyStatusAdapter = topologyStatusAdapter
   103  
   104  	return p, nil
   105  }
   106  
   107  func (p *kubeletPlugin) Name() string {
   108  	return PluginName
   109  }
   110  
   111  func (p *kubeletPlugin) Run(success chan<- bool) {
   112  	err := p.topologyStatusAdapter.Run(p.ctx, p.topologyStatusChangeHandler)
   113  	if err != nil {
   114  		klog.Fatalf("run topology status adapter failed: %v", err)
   115  		return
   116  	}
   117  	success <- true
   118  
   119  	for {
   120  		select {
   121  		case _, ok := <-p.notifierCh:
   122  			if !ok {
   123  				klog.Infof("plugin %s has been stopped", PluginName)
   124  				return
   125  			}
   126  
   127  			resp, err := p.getReportContent(p.ctx)
   128  			if err != nil {
   129  				klog.Errorf("plugin %s failed to get report content with error %v", PluginName, err)
   130  				continue
   131  			}
   132  
   133  			p.ListAndWatchReportContentCallback(PluginName, resp)
   134  		case <-p.ctx.Done():
   135  			klog.Infof("plugin %s has been stopped", PluginName)
   136  			return
   137  		}
   138  	}
   139  }
   140  
   141  func (p *kubeletPlugin) GetReportContent(ctx context.Context) (*v1alpha1.GetReportContentResponse, error) {
   142  	return p.getReportContent(ctx)
   143  }
   144  
   145  func (p *kubeletPlugin) ListAndWatchReportContentCallback(pluginName string, response *v1alpha1.GetReportContentResponse) {
   146  	p.setCache(response)
   147  
   148  	p.cb(pluginName, response)
   149  }
   150  
   151  func (p *kubeletPlugin) GetCache() *v1alpha1.GetReportContentResponse {
   152  	resp := p.latestReportContentResponse.Load()
   153  	if resp == nil {
   154  		return nil
   155  	}
   156  
   157  	return resp.(*v1alpha1.GetReportContentResponse)
   158  }
   159  
   160  // Stop to cancel all context and close notifierCh
   161  func (p *kubeletPlugin) Stop() {
   162  	p.mutex.Lock()
   163  	defer p.mutex.Unlock()
   164  
   165  	p.cancel()
   166  	close(p.notifierCh)
   167  
   168  	p.StopControl.Stop()
   169  }
   170  
   171  // topologyStatusChangeHandler is called by topology adapter when topology status changes
   172  func (p *kubeletPlugin) topologyStatusChangeHandler() {
   173  	p.mutex.RLock()
   174  	defer p.mutex.RUnlock()
   175  
   176  	select {
   177  	case p.notifierCh <- struct{}{}:
   178  		klog.Infof("send topology change notification to plugin %s", PluginName)
   179  	default:
   180  		klog.Warningf("plugin %s is busy, skip topology change notification", PluginName)
   181  	}
   182  }
   183  
   184  func (p *kubeletPlugin) setCache(resp *v1alpha1.GetReportContentResponse) {
   185  	p.latestReportContentResponse.Store(resp)
   186  }
   187  
   188  // getReportContent get report content from all collectors
   189  func (p *kubeletPlugin) getReportContent(ctx context.Context) (*v1alpha1.GetReportContentResponse, error) {
   190  	reportContent, err := p.getTopologyStatusContent(ctx)
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  
   195  	return &v1alpha1.GetReportContentResponse{
   196  		Content: reportContent,
   197  	}, nil
   198  }
   199  
   200  // getTopologyStatusContent get topology status content from topologyStatusAdapter
   201  func (p *kubeletPlugin) getTopologyStatusContent(ctx context.Context) ([]*v1alpha1.ReportContent, error) {
   202  	topologyStatus, err := p.topologyStatusAdapter.GetTopologyZones(ctx)
   203  	if err != nil {
   204  		return nil, errors.Wrap(err, "get numa topology status from adapter failed")
   205  	}
   206  
   207  	value, err := json.Marshal(&topologyStatus)
   208  	if err != nil {
   209  		return nil, errors.Wrap(err, "marshal topology status failed")
   210  	}
   211  
   212  	topologyStatusContent := []*v1alpha1.ReportContent{
   213  		{
   214  			GroupVersionKind: &util.CNRGroupVersionKind,
   215  			Field: []*v1alpha1.ReportField{
   216  				{
   217  					FieldType: v1alpha1.FieldType_Status,
   218  					FieldName: util.CNRFieldNameTopologyZone,
   219  					Value:     value,
   220  				},
   221  			},
   222  		},
   223  	}
   224  
   225  	if p.conf.EnableReportTopologyPolicy {
   226  		content, err := p.getTopologyPolicyReportContent(ctx)
   227  		if err != nil {
   228  			return nil, errors.Wrap(err, "get topology policy report content failed")
   229  		}
   230  		topologyStatusContent = append(topologyStatusContent, content)
   231  	}
   232  
   233  	return topologyStatusContent, nil
   234  }
   235  
   236  func (p *kubeletPlugin) getNumaInfo() ([]info.Node, error) {
   237  	if p.metaServer == nil || p.metaServer.MachineInfo == nil {
   238  		return nil, fmt.Errorf("get metaserver machine info is nil")
   239  	}
   240  	return p.metaServer.MachineInfo.Topology, nil
   241  }
   242  
   243  func (p *kubeletPlugin) getTopologyPolicyReportContent(ctx context.Context) (*v1alpha1.ReportContent, error) {
   244  	var (
   245  		topologyPolicy nodev1alpha1.TopologyPolicy
   246  		err            error
   247  	)
   248  
   249  	if p.reportOrmTopologyPolicy() {
   250  		// report orm topology policy only if orm is explicitly enabled in the configuration.
   251  		topologyPolicy = utils.GenerateTopologyPolicy(p.conf.TopologyPolicyName, apiconfig.ContainerTopologyManagerScope)
   252  	} else {
   253  		topologyPolicy, err = p.topologyStatusAdapter.GetTopologyPolicy(ctx)
   254  		if err != nil {
   255  			return nil, errors.Wrap(err, "get topology policy from adapter failed")
   256  		}
   257  	}
   258  
   259  	valueTopologyPolicy, err := json.Marshal(&topologyPolicy)
   260  	if err != nil {
   261  		return nil, errors.Wrap(err, "marshal topology policy failed")
   262  	}
   263  
   264  	return &v1alpha1.ReportContent{
   265  		GroupVersionKind: &util.CNRGroupVersionKind,
   266  		Field: []*v1alpha1.ReportField{
   267  			{
   268  				FieldType: v1alpha1.FieldType_Status,
   269  				FieldName: util.CNRFieldNameTopologyPolicy,
   270  				Value:     valueTopologyPolicy,
   271  			},
   272  		},
   273  	}, nil
   274  }
   275  
   276  func (p *kubeletPlugin) reportOrmTopologyPolicy() bool {
   277  	if p.conf.TopologyPolicyName == "" {
   278  		return false
   279  	}
   280  
   281  	return true
   282  }