github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/io/staticpolicy/policy.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package staticpolicy
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"k8s.io/apimachinery/pkg/util/wait"
    26  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    27  
    28  	"github.com/kubewharf/katalyst-api/pkg/consts"
    29  	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent"
    30  	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent/qrm"
    31  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/io/handlers/dirtymem"
    32  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/io/handlers/iocost"
    33  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/io/handlers/ioweight"
    34  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
    35  	"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
    36  	"github.com/kubewharf/katalyst-core/pkg/config"
    37  	"github.com/kubewharf/katalyst-core/pkg/config/generic"
    38  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    39  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    40  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    41  )
    42  
    43  const (
    44  	// IOResourcePluginPolicyNameStatic is the policy name of static io resource plugin
    45  	IOResourcePluginPolicyNameStatic = string(consts.ResourcePluginPolicyNameStatic)
    46  )
    47  
    48  // StaticPolicy is the static io policy
    49  type StaticPolicy struct {
    50  	sync.Mutex
    51  
    52  	name      string
    53  	stopCh    chan struct{}
    54  	started   bool
    55  	qosConfig *generic.QoSConfiguration
    56  
    57  	emitter    metrics.MetricEmitter
    58  	metaServer *metaserver.MetaServer
    59  	agentCtx   *agent.GenericContext
    60  
    61  	enableSettingWBT      bool
    62  	enableSettingIOWeight bool
    63  }
    64  
    65  // NewStaticPolicy returns a static io policy
    66  func NewStaticPolicy(agentCtx *agent.GenericContext, conf *config.Configuration,
    67  	_ interface{}, agentName string,
    68  ) (bool, agent.Component, error) {
    69  	wrappedEmitter := agentCtx.EmitterPool.GetDefaultMetricsEmitter().WithTags(agentName, metrics.MetricTag{
    70  		Key: util.QRMPluginPolicyTagName,
    71  		Val: IOResourcePluginPolicyNameStatic,
    72  	})
    73  
    74  	policyImplement := &StaticPolicy{
    75  		emitter:               wrappedEmitter,
    76  		metaServer:            agentCtx.MetaServer,
    77  		agentCtx:              agentCtx,
    78  		stopCh:                make(chan struct{}),
    79  		name:                  fmt.Sprintf("%s_%s", agentName, IOResourcePluginPolicyNameStatic),
    80  		qosConfig:             conf.QoSConfiguration,
    81  		enableSettingWBT:      conf.EnableSettingWBT,
    82  		enableSettingIOWeight: conf.EnableSettingIOWeight,
    83  	}
    84  
    85  	// todo: currently there is no resource needed to be topology-aware and synchronously allocated in this plugin,
    86  	// so not to wrap the plugin by RegistrationPluginWrapper and it won't be registered to QRM framework temporarily.
    87  
    88  	return true, &agent.PluginWrapper{GenericPlugin: policyImplement}, nil
    89  }
    90  
    91  // Start starts this plugin
    92  func (p *StaticPolicy) Start() (err error) {
    93  	general.Infof("called")
    94  
    95  	p.Lock()
    96  	defer func() {
    97  		if !p.started {
    98  			if err == nil {
    99  				p.started = true
   100  			} else {
   101  				close(p.stopCh)
   102  			}
   103  		}
   104  		p.Unlock()
   105  	}()
   106  
   107  	if p.started {
   108  		general.Infof("already started")
   109  		return nil
   110  	}
   111  
   112  	p.stopCh = make(chan struct{})
   113  
   114  	go wait.Until(func() {
   115  		_ = p.emitter.StoreInt64(util.MetricNameHeartBeat, 1, metrics.MetricTypeNameRaw)
   116  	}, time.Second*30, p.stopCh)
   117  
   118  	if p.enableSettingIOWeight {
   119  		err = periodicalhandler.RegisterPeriodicalHandler(qrm.QRMIOPluginPeriodicalHandlerGroupName,
   120  			ioweight.EnableSetIOWeightPeriodicalHandlerName, ioweight.IOWeightTaskFunc, 30*time.Second)
   121  		if err != nil {
   122  			general.Infof("register syncIOWeight failed, err=%v", err)
   123  		}
   124  	}
   125  	if p.enableSettingWBT {
   126  		general.Infof("setWBT enabled")
   127  		err := periodicalhandler.RegisterPeriodicalHandler(qrm.QRMIOPluginPeriodicalHandlerGroupName,
   128  			dirtymem.EnableSetDirtyMemPeriodicalHandlerName, dirtymem.SetDirtyMem, 300*time.Second)
   129  		if err != nil {
   130  			general.Infof("setSockMem failed, err=%v", err)
   131  		}
   132  	}
   133  
   134  	// Notice: iocost.SetIOCost will check the featuregate.
   135  	// If conf.EnableSettingIOCost was disabled,
   136  	// iocost.SetIOCost will disable all the io.cost related functions in host.
   137  	general.Infof("setIOCost handler started")
   138  	err = periodicalhandler.RegisterPeriodicalHandler(qrm.QRMIOPluginPeriodicalHandlerGroupName,
   139  		iocost.EnableSetIOCostPeriodicalHandlerName, iocost.SetIOCost, 300*time.Second)
   140  	if err != nil {
   141  		general.Infof("setIOCost failed, err=%v", err)
   142  	}
   143  
   144  	go wait.Until(func() {
   145  		periodicalhandler.ReadyToStartHandlersByGroup(qrm.QRMIOPluginPeriodicalHandlerGroupName)
   146  	}, 5*time.Second, p.stopCh)
   147  	return nil
   148  }
   149  
   150  // Stop stops this plugin
   151  func (p *StaticPolicy) Stop() error {
   152  	p.Lock()
   153  	defer func() {
   154  		p.started = false
   155  		p.Unlock()
   156  		general.Infof("stopped")
   157  	}()
   158  
   159  	if !p.started {
   160  		general.Warningf("already stopped")
   161  		return nil
   162  	}
   163  
   164  	close(p.stopCh)
   165  
   166  	periodicalhandler.StopHandlersByGroup(qrm.QRMIOPluginPeriodicalHandlerGroupName)
   167  	return nil
   168  }
   169  
   170  // Name returns the name of this plugin
   171  func (p *StaticPolicy) Name() string {
   172  	return p.name
   173  }
   174  
   175  // ResourceName returns resource names managed by this plugin
   176  func (p *StaticPolicy) ResourceName() string {
   177  	// todo: return correct value when there is resource needed to be topology-aware and synchronously allocated in this plugin
   178  	return ""
   179  }
   180  
   181  // GetTopologyHints returns hints of corresponding resources
   182  func (p *StaticPolicy) GetTopologyHints(_ context.Context,
   183  	req *pluginapi.ResourceRequest,
   184  ) (resp *pluginapi.ResourceHintsResponse, err error) {
   185  	if req == nil {
   186  		return nil, fmt.Errorf("GetTopologyHints got nil req")
   187  	}
   188  
   189  	return util.PackResourceHintsResponse(req, p.ResourceName(), nil)
   190  }
   191  
   192  func (p *StaticPolicy) RemovePod(_ context.Context,
   193  	req *pluginapi.RemovePodRequest,
   194  ) (*pluginapi.RemovePodResponse, error) {
   195  	if req == nil {
   196  		return nil, fmt.Errorf("RemovePod got nil req")
   197  	}
   198  
   199  	return &pluginapi.RemovePodResponse{}, nil
   200  }
   201  
   202  // GetResourcesAllocation returns allocation results of corresponding resources
   203  func (p *StaticPolicy) GetResourcesAllocation(_ context.Context,
   204  	_ *pluginapi.GetResourcesAllocationRequest,
   205  ) (*pluginapi.GetResourcesAllocationResponse, error) {
   206  	return &pluginapi.GetResourcesAllocationResponse{}, nil
   207  }
   208  
   209  // GetTopologyAwareResources returns allocation results of corresponding resources as topology aware format
   210  func (p *StaticPolicy) GetTopologyAwareResources(_ context.Context,
   211  	_ *pluginapi.GetTopologyAwareResourcesRequest,
   212  ) (*pluginapi.GetTopologyAwareResourcesResponse, error) {
   213  	return &pluginapi.GetTopologyAwareResourcesResponse{}, nil
   214  }
   215  
   216  // GetTopologyAwareAllocatableResources returns corresponding allocatable resources as topology aware format
   217  func (p *StaticPolicy) GetTopologyAwareAllocatableResources(_ context.Context,
   218  	_ *pluginapi.GetTopologyAwareAllocatableResourcesRequest,
   219  ) (*pluginapi.GetTopologyAwareAllocatableResourcesResponse, error) {
   220  	return &pluginapi.GetTopologyAwareAllocatableResourcesResponse{}, nil
   221  }
   222  
   223  // GetResourcePluginOptions returns options to be communicated with Resource Manager
   224  func (p *StaticPolicy) GetResourcePluginOptions(context.Context,
   225  	*pluginapi.Empty,
   226  ) (*pluginapi.ResourcePluginOptions, error) {
   227  	return &pluginapi.ResourcePluginOptions{
   228  		PreStartRequired:      false,
   229  		WithTopologyAlignment: false,
   230  		NeedReconcile:         false,
   231  	}, nil
   232  }
   233  
   234  // Allocate is called during pod admit so that the resource
   235  // plugin can allocate corresponding resource for the container
   236  // according to resource request
   237  func (p *StaticPolicy) Allocate(_ context.Context,
   238  	req *pluginapi.ResourceRequest,
   239  ) (resp *pluginapi.ResourceAllocationResponse, err error) {
   240  	if req == nil {
   241  		return nil, fmt.Errorf("GetTopologyHints got nil req")
   242  	}
   243  
   244  	return &pluginapi.ResourceAllocationResponse{
   245  		PodUid:         req.PodUid,
   246  		PodNamespace:   req.PodNamespace,
   247  		PodName:        req.PodName,
   248  		ContainerName:  req.ContainerName,
   249  		ContainerType:  req.ContainerType,
   250  		ContainerIndex: req.ContainerIndex,
   251  		PodRole:        req.PodRole,
   252  		PodType:        req.PodType,
   253  		ResourceName:   p.ResourceName(),
   254  		Labels:         general.DeepCopyMap(req.Labels),
   255  		Annotations:    general.DeepCopyMap(req.Annotations),
   256  	}, nil
   257  }
   258  
   259  // PreStartContainer is called, if indicated by resource plugin during registration phase,
   260  // before each container start. Resource plugin can run resource specific operations
   261  // such as resetting the resource before making resources available to the container
   262  func (p *StaticPolicy) PreStartContainer(context.Context,
   263  	*pluginapi.PreStartContainerRequest,
   264  ) (*pluginapi.PreStartContainerResponse, error) {
   265  	return &pluginapi.PreStartContainerResponse{}, nil
   266  }