github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/qrm-plugins/cpu/nativepolicy/policy_test.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package nativepolicy
    18  
    19  import (
    20  	"context"
    21  	"io/ioutil"
    22  	"os"
    23  	"strings"
    24  	"testing"
    25  
    26  	"github.com/stretchr/testify/require"
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/util/uuid"
    29  	pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
    30  
    31  	cpuconsts "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/consts"
    32  	"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
    33  	"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
    34  	"github.com/kubewharf/katalyst-core/pkg/metrics"
    35  	"github.com/kubewharf/katalyst-core/pkg/util/machine"
    36  )
    37  
    38  const (
    39  	podDebugAnnoKey = "qrm.katalyst.kubewharf.io/debug_pod"
    40  )
    41  
    42  func getTestNativePolicy(topology *machine.CPUTopology, stateFileDirectory string) (*NativePolicy, error) {
    43  	stateImpl, err := state.NewCheckpointState(stateFileDirectory, cpuPluginStateFileName,
    44  		cpuconsts.CPUResourcePluginPolicyNameNative, topology, false)
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  
    49  	machineInfo := &machine.KatalystMachineInfo{
    50  		CPUTopology: topology,
    51  	}
    52  
    53  	dynamicConfig := dynamic.NewDynamicAgentConfiguration()
    54  
    55  	policyImplement := &NativePolicy{
    56  		machineInfo:      machineInfo,
    57  		emitter:          metrics.DummyMetrics{},
    58  		residualHitMap:   make(map[string]int64),
    59  		cpusToReuse:      make(map[string]machine.CPUSet),
    60  		state:            stateImpl,
    61  		dynamicConfig:    dynamicConfig,
    62  		podDebugAnnoKeys: []string{podDebugAnnoKey},
    63  		reservedCPUs:     machine.NewCPUSet(),
    64  	}
    65  
    66  	state.SetContainerRequestedCores(policyImplement.getContainerRequestedCores)
    67  
    68  	return policyImplement, nil
    69  }
    70  
    71  func TestRemovePod(t *testing.T) {
    72  	t.Parallel()
    73  
    74  	as := require.New(t)
    75  
    76  	tmpDir, err := ioutil.TempDir("", "checkpoint-TestRemovePod")
    77  	as.Nil(err)
    78  	defer func() { _ = os.RemoveAll(tmpDir) }()
    79  
    80  	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4)
    81  	as.Nil(err)
    82  
    83  	nativePolicy, err := getTestNativePolicy(cpuTopology, tmpDir)
    84  	as.Nil(err)
    85  
    86  	testName := "test"
    87  
    88  	// test for gt
    89  	req := &pluginapi.ResourceRequest{
    90  		PodUid:         string(uuid.NewUUID()),
    91  		PodNamespace:   testName,
    92  		PodName:        testName,
    93  		ContainerName:  testName,
    94  		ContainerType:  pluginapi.ContainerType_MAIN,
    95  		ContainerIndex: 0,
    96  		ResourceName:   string(v1.ResourceCPU),
    97  		ResourceRequests: map[string]float64{
    98  			string(v1.ResourceCPU): 2,
    99  		},
   100  		Labels:         map[string]string{},
   101  		Annotations:    map[string]string{},
   102  		NativeQosClass: string(v1.PodQOSGuaranteed),
   103  	}
   104  
   105  	_, err = nativePolicy.Allocate(context.Background(), req)
   106  	as.Nil(err)
   107  
   108  	resp, err := nativePolicy.GetTopologyAwareResources(context.Background(), &pluginapi.GetTopologyAwareResourcesRequest{
   109  		PodUid:        req.PodUid,
   110  		ContainerName: testName,
   111  	})
   112  	as.Nil(err)
   113  
   114  	expetced := &pluginapi.GetTopologyAwareResourcesResponse{
   115  		PodUid:       req.PodUid,
   116  		PodNamespace: testName,
   117  		PodName:      testName,
   118  		ContainerTopologyAwareResources: &pluginapi.ContainerTopologyAwareResources{
   119  			ContainerName: testName,
   120  			AllocatedResources: map[string]*pluginapi.TopologyAwareResource{
   121  				string(v1.ResourceCPU): {
   122  					IsNodeResource:             false,
   123  					IsScalarResource:           true,
   124  					AggregatedQuantity:         2,
   125  					OriginalAggregatedQuantity: 2,
   126  					TopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{
   127  						{ResourceValue: 2, Node: 0},
   128  					},
   129  					OriginalTopologyAwareQuantityList: []*pluginapi.TopologyAwareQuantity{
   130  						{ResourceValue: 2, Node: 0},
   131  					},
   132  				},
   133  			},
   134  		},
   135  	}
   136  
   137  	as.Equal(expetced, resp)
   138  
   139  	_, _ = nativePolicy.RemovePod(context.Background(), &pluginapi.RemovePodRequest{
   140  		PodUid: req.PodUid,
   141  	})
   142  
   143  	_, err = nativePolicy.GetTopologyAwareResources(context.Background(), &pluginapi.GetTopologyAwareResourcesRequest{
   144  		PodUid:        req.PodUid,
   145  		ContainerName: testName,
   146  	})
   147  	as.NotNil(err)
   148  	as.True(strings.Contains(err.Error(), "is not show up in cpu plugin state"))
   149  }
   150  
   151  func TestGetTopologyHints(t *testing.T) {
   152  	t.Parallel()
   153  
   154  	as := require.New(t)
   155  	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4)
   156  	as.Nil(err)
   157  
   158  	testName := "test"
   159  
   160  	testCases := []struct {
   161  		description  string
   162  		req          *pluginapi.ResourceRequest
   163  		expectedResp *pluginapi.ResourceHintsResponse
   164  		cpuTopology  *machine.CPUTopology
   165  	}{
   166  		{
   167  			description: "req for init container",
   168  			req: &pluginapi.ResourceRequest{
   169  				PodUid:         string(uuid.NewUUID()),
   170  				PodNamespace:   testName,
   171  				PodName:        testName,
   172  				ContainerName:  testName,
   173  				ContainerType:  pluginapi.ContainerType_INIT,
   174  				ContainerIndex: 0,
   175  				ResourceName:   string(v1.ResourceCPU),
   176  				ResourceRequests: map[string]float64{
   177  					string(v1.ResourceCPU): 2,
   178  				},
   179  				NativeQosClass: string(v1.PodQOSGuaranteed),
   180  			},
   181  			expectedResp: &pluginapi.ResourceHintsResponse{
   182  				PodNamespace:   testName,
   183  				PodName:        testName,
   184  				ContainerName:  testName,
   185  				ContainerType:  pluginapi.ContainerType_INIT,
   186  				ContainerIndex: 0,
   187  				ResourceName:   string(v1.ResourceCPU),
   188  				ResourceHints: map[string]*pluginapi.ListOfTopologyHints{
   189  					string(v1.ResourceCPU): nil,
   190  				},
   191  				NativeQosClass: string(v1.PodQOSGuaranteed),
   192  			},
   193  			cpuTopology: cpuTopology,
   194  		},
   195  		{
   196  			description: "req for guaranteed QoS and integer cores main container",
   197  			req: &pluginapi.ResourceRequest{
   198  				PodUid:         string(uuid.NewUUID()),
   199  				PodNamespace:   testName,
   200  				PodName:        testName,
   201  				ContainerName:  testName,
   202  				ContainerType:  pluginapi.ContainerType_MAIN,
   203  				ContainerIndex: 0,
   204  				ResourceName:   string(v1.ResourceCPU),
   205  				ResourceRequests: map[string]float64{
   206  					string(v1.ResourceCPU): 2,
   207  				},
   208  				NativeQosClass: string(v1.PodQOSGuaranteed),
   209  			},
   210  			expectedResp: &pluginapi.ResourceHintsResponse{
   211  				PodNamespace:   testName,
   212  				PodName:        testName,
   213  				ContainerName:  testName,
   214  				ContainerType:  pluginapi.ContainerType_MAIN,
   215  				ContainerIndex: 0,
   216  				ResourceName:   string(v1.ResourceCPU),
   217  				ResourceHints: map[string]*pluginapi.ListOfTopologyHints{
   218  					string(v1.ResourceCPU): {
   219  						Hints: []*pluginapi.TopologyHint{
   220  							{
   221  								Nodes:     []uint64{0},
   222  								Preferred: true,
   223  							},
   224  							{
   225  								Nodes:     []uint64{1},
   226  								Preferred: true,
   227  							},
   228  							{
   229  								Nodes:     []uint64{2},
   230  								Preferred: true,
   231  							},
   232  							{
   233  								Nodes:     []uint64{3},
   234  								Preferred: true,
   235  							},
   236  							{
   237  								Nodes:     []uint64{0, 1},
   238  								Preferred: false,
   239  							},
   240  							{
   241  								Nodes:     []uint64{0, 2},
   242  								Preferred: false,
   243  							},
   244  							{
   245  								Nodes:     []uint64{0, 3},
   246  								Preferred: false,
   247  							},
   248  							{
   249  								Nodes:     []uint64{1, 2},
   250  								Preferred: false,
   251  							},
   252  							{
   253  								Nodes:     []uint64{1, 3},
   254  								Preferred: false,
   255  							},
   256  							{
   257  								Nodes:     []uint64{2, 3},
   258  								Preferred: false,
   259  							},
   260  							{
   261  								Nodes:     []uint64{0, 1, 2},
   262  								Preferred: false,
   263  							},
   264  							{
   265  								Nodes:     []uint64{0, 1, 3},
   266  								Preferred: false,
   267  							},
   268  							{
   269  								Nodes:     []uint64{0, 2, 3},
   270  								Preferred: false,
   271  							},
   272  							{
   273  								Nodes:     []uint64{1, 2, 3},
   274  								Preferred: false,
   275  							},
   276  							{
   277  								Nodes:     []uint64{0, 1, 2, 3},
   278  								Preferred: false,
   279  							},
   280  						},
   281  					},
   282  				},
   283  				NativeQosClass: string(v1.PodQOSGuaranteed),
   284  			},
   285  			cpuTopology: cpuTopology,
   286  		},
   287  		{
   288  			description: "req for shared pool contaienr",
   289  			req: &pluginapi.ResourceRequest{
   290  				PodUid:         string(uuid.NewUUID()),
   291  				PodNamespace:   testName,
   292  				PodName:        testName,
   293  				ContainerName:  testName,
   294  				ContainerType:  pluginapi.ContainerType_MAIN,
   295  				ContainerIndex: 0,
   296  				ResourceName:   string(v1.ResourceCPU),
   297  				ResourceRequests: map[string]float64{
   298  					string(v1.ResourceCPU): 1.5,
   299  				},
   300  				NativeQosClass: string(v1.PodQOSGuaranteed),
   301  			},
   302  			expectedResp: &pluginapi.ResourceHintsResponse{
   303  				PodNamespace:   testName,
   304  				PodName:        testName,
   305  				ContainerName:  testName,
   306  				ContainerType:  pluginapi.ContainerType_MAIN,
   307  				ContainerIndex: 0,
   308  				ResourceName:   string(v1.ResourceCPU),
   309  				ResourceHints: map[string]*pluginapi.ListOfTopologyHints{
   310  					string(v1.ResourceCPU): nil,
   311  				},
   312  				NativeQosClass: string(v1.PodQOSGuaranteed),
   313  			},
   314  			cpuTopology: cpuTopology,
   315  		},
   316  	}
   317  
   318  	for _, tc := range testCases {
   319  		tmpDir, err := ioutil.TempDir("", "checkpoint-TestGetTopologyHints")
   320  		as.Nil(err)
   321  
   322  		nativePolicy, err := getTestNativePolicy(tc.cpuTopology, tmpDir)
   323  		as.Nil(err)
   324  
   325  		resp, err := nativePolicy.GetTopologyHints(context.Background(), tc.req)
   326  		as.Nil(err)
   327  
   328  		tc.expectedResp.PodUid = tc.req.PodUid
   329  		as.Equalf(tc.expectedResp, resp, "failed in test case: %s", tc.description)
   330  
   331  		_ = os.RemoveAll(tmpDir)
   332  	}
   333  }
   334  
   335  func TestGetReadonlyState(t *testing.T) {
   336  	t.Parallel()
   337  
   338  	as := require.New(t)
   339  	readonlyState, err := GetReadonlyState()
   340  	as.NotNil(err)
   341  	as.Nil(readonlyState)
   342  }
   343  
   344  func TestClearResidualState(t *testing.T) {
   345  	t.Parallel()
   346  
   347  	as := require.New(t)
   348  
   349  	tmpDir, err := ioutil.TempDir("", "checkpoint_TestClearResidualState")
   350  	as.Nil(err)
   351  	defer os.RemoveAll(tmpDir)
   352  
   353  	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4)
   354  	as.Nil(err)
   355  
   356  	dynamicPolicy, err := getTestNativePolicy(cpuTopology, tmpDir)
   357  	as.Nil(err)
   358  
   359  	dynamicPolicy.clearResidualState()
   360  }
   361  
   362  func TestStart(t *testing.T) {
   363  	t.Parallel()
   364  
   365  	as := require.New(t)
   366  
   367  	tmpDir, err := ioutil.TempDir("", "checkpoint_TestStart")
   368  	as.Nil(err)
   369  	defer os.RemoveAll(tmpDir)
   370  
   371  	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4)
   372  	as.Nil(err)
   373  
   374  	dynamicPolicy, err := getTestNativePolicy(cpuTopology, tmpDir)
   375  	as.Nil(err)
   376  
   377  	err = dynamicPolicy.Start()
   378  	as.Nil(err)
   379  }
   380  
   381  func TestStop(t *testing.T) {
   382  	t.Parallel()
   383  
   384  	as := require.New(t)
   385  
   386  	tmpDir, err := ioutil.TempDir("", "checkpoint_TestStop")
   387  	as.Nil(err)
   388  	defer os.RemoveAll(tmpDir)
   389  
   390  	cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4)
   391  	as.Nil(err)
   392  
   393  	dynamicPolicy, err := getTestNativePolicy(cpuTopology, tmpDir)
   394  	as.Nil(err)
   395  
   396  	err = dynamicPolicy.Stop()
   397  	as.Nil(err)
   398  }