volcano.sh/volcano@v1.9.0/pkg/scheduler/api/node_info_test.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package api 18 19 import ( 20 "reflect" 21 "testing" 22 23 v1 "k8s.io/api/core/v1" 24 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 25 k8sframework "k8s.io/kubernetes/pkg/scheduler/framework" 26 27 "volcano.sh/volcano/pkg/scheduler/api/devices/nvidia/gpushare" 28 "volcano.sh/volcano/pkg/scheduler/api/devices/nvidia/vgpu" 29 ) 30 31 func nodeInfoEqual(l, r *NodeInfo) bool { 32 return reflect.DeepEqual(l, r) 33 } 34 35 func TestNodeInfo_AddPod(t *testing.T) { 36 // case1 37 case01Node := buildNode("n1", BuildResourceList("8000m", "10G", []ScalarResource{{Name: "pods", Value: "20"}}...)) 38 case01Pod1 := buildPod("c1", "p1", "n1", v1.PodRunning, BuildResourceList("1000m", "1G"), []metav1.OwnerReference{}, make(map[string]string)) 39 case01Pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, BuildResourceList("2000m", "2G"), []metav1.OwnerReference{}, make(map[string]string)) 40 // case2 41 case02Node := buildNode("n2", BuildResourceList("2000m", "1G", []ScalarResource{{Name: "pods", Value: "20"}}...)) 42 case02Pod1 := buildPod("c2", "p1", "n2", v1.PodUnknown, BuildResourceList("1000m", "2G"), []metav1.OwnerReference{}, make(map[string]string)) 43 44 tests := []struct { 45 name string 46 node *v1.Node 47 pods []*v1.Pod 48 expected *NodeInfo 49 expectedFailure bool 50 }{ 51 { 52 name: "add 2 running non-owner pod", 53 node: case01Node, 54 pods: []*v1.Pod{case01Pod1, case01Pod2}, 55 expected: &NodeInfo{ 56 Name: "n1", 57 Node: case01Node, 58 Idle: buildResource("5000m", "7G", map[string]string{"pods": "18"}, 20), 59 Used: buildResource("3000m", "3G", map[string]string{"pods": "2"}, 0), 60 Releasing: EmptyResource(), 61 Pipelined: EmptyResource(), 62 OversubscriptionResource: EmptyResource(), 63 Allocatable: buildResource("8000m", "10G", map[string]string{"pods": "20"}, 20), 64 Capacity: buildResource("8000m", "10G", map[string]string{"pods": "20"}, 20), 65 ResourceUsage: &NodeUsage{}, 66 State: NodeState{Phase: Ready}, 67 Tasks: map[TaskID]*TaskInfo{ 68 "c1/p1": NewTaskInfo(case01Pod1), 69 "c1/p2": NewTaskInfo(case01Pod2), 70 }, 71 Others: map[string]interface{}{ 72 GPUSharingDevice: gpushare.NewGPUDevices("n1", case01Node), 73 vgpu.DeviceName: vgpu.NewGPUDevices("n1", case01Node), 74 }, 75 ImageStates: make(map[string]*k8sframework.ImageStateSummary), 76 }, 77 }, 78 { 79 name: "add 1 unknown pod and pod memory req > idle", 80 node: case02Node, 81 pods: []*v1.Pod{case02Pod1}, 82 expected: &NodeInfo{ 83 Name: "n2", 84 Node: case02Node, 85 Idle: buildResource("1000m", "-1G", map[string]string{"pods": "19"}, 20), 86 Used: buildResource("1000m", "2G", map[string]string{"pods": "1"}, 0), 87 Releasing: EmptyResource(), 88 Pipelined: EmptyResource(), 89 OversubscriptionResource: EmptyResource(), 90 Allocatable: buildResource("2000m", "1G", map[string]string{"pods": "20"}, 20), 91 Capacity: buildResource("2000m", "1G", map[string]string{"pods": "20"}, 20), 92 ResourceUsage: &NodeUsage{}, 93 State: NodeState{Phase: Ready}, 94 Tasks: map[TaskID]*TaskInfo{ 95 "c2/p1": NewTaskInfo(case02Pod1), 96 }, 97 Others: map[string]interface{}{ 98 GPUSharingDevice: gpushare.NewGPUDevices("n2", case01Node), 99 vgpu.DeviceName: vgpu.NewGPUDevices("n2", case01Node), 100 }, 101 ImageStates: make(map[string]*k8sframework.ImageStateSummary), 102 }, 103 expectedFailure: false, 104 }, 105 } 106 107 for i, test := range tests { 108 ni := NewNodeInfo(test.node) 109 for _, pod := range test.pods { 110 pi := NewTaskInfo(pod) 111 err := ni.AddTask(pi) 112 if err != nil && !test.expectedFailure { 113 t.Errorf("node info %d: \n expected success, \n but got err %v \n", i, err) 114 } 115 if err == nil && test.expectedFailure { 116 t.Errorf("node info %d: \n expected failure, \n but got success \n", i) 117 } 118 } 119 120 if !nodeInfoEqual(ni, test.expected) { 121 t.Errorf("node info %d: \n expected %v, \n got %v \n", 122 i, test.expected, ni) 123 } 124 } 125 } 126 127 func TestNodeInfo_RemovePod(t *testing.T) { 128 // case1 129 case01Node := buildNode("n1", BuildResourceList("8000m", "10G", []ScalarResource{{Name: "pods", Value: "10"}}...)) 130 case01Pod1 := buildPod("c1", "p1", "n1", v1.PodRunning, BuildResourceList("1000m", "1G"), []metav1.OwnerReference{}, make(map[string]string)) 131 case01Pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, BuildResourceList("2000m", "2G"), []metav1.OwnerReference{}, make(map[string]string)) 132 case01Pod3 := buildPod("c1", "p3", "n1", v1.PodRunning, BuildResourceList("3000m", "3G"), []metav1.OwnerReference{}, make(map[string]string)) 133 134 tests := []struct { 135 name string 136 node *v1.Node 137 pods []*v1.Pod 138 rmPods []*v1.Pod 139 expected *NodeInfo 140 }{ 141 { 142 name: "add 3 running non-owner pod, remove 1 running non-owner pod", 143 node: case01Node, 144 pods: []*v1.Pod{case01Pod1, case01Pod2, case01Pod3}, 145 rmPods: []*v1.Pod{case01Pod2}, 146 expected: &NodeInfo{ 147 Name: "n1", 148 Node: case01Node, 149 Idle: buildResource("4000m", "6G", map[string]string{"pods": "8"}, 10), 150 Used: buildResource("4000m", "4G", map[string]string{"pods": "2"}, 0), 151 OversubscriptionResource: EmptyResource(), 152 Releasing: EmptyResource(), 153 Pipelined: EmptyResource(), 154 Allocatable: buildResource("8000m", "10G", map[string]string{"pods": "10"}, 10), 155 Capacity: buildResource("8000m", "10G", map[string]string{"pods": "10"}, 10), 156 ResourceUsage: &NodeUsage{}, 157 State: NodeState{Phase: Ready}, 158 Tasks: map[TaskID]*TaskInfo{ 159 "c1/p1": NewTaskInfo(case01Pod1), 160 "c1/p3": NewTaskInfo(case01Pod3), 161 }, 162 Others: map[string]interface{}{ 163 GPUSharingDevice: gpushare.NewGPUDevices("n1", case01Node), 164 vgpu.DeviceName: vgpu.NewGPUDevices("n1", case01Node), 165 }, 166 ImageStates: make(map[string]*k8sframework.ImageStateSummary), 167 }, 168 }, 169 } 170 171 for i, test := range tests { 172 ni := NewNodeInfo(test.node) 173 174 for _, pod := range test.pods { 175 pi := NewTaskInfo(pod) 176 ni.AddTask(pi) 177 } 178 179 for _, pod := range test.rmPods { 180 pi := NewTaskInfo(pod) 181 ni.RemoveTask(pi) 182 } 183 184 if !nodeInfoEqual(ni, test.expected) { 185 t.Errorf("node info %d: \n expected %v, \n got %v \n", 186 i, test.expected, ni) 187 } 188 } 189 } 190 191 func TestNodeInfo_SetNode(t *testing.T) { 192 // case1 193 case01Node1 := buildNode("n1", BuildResourceList("10", "10G", []ScalarResource{{Name: "pods", Value: "15"}}...)) 194 case01Node2 := buildNode("n1", BuildResourceList("8", "8G", []ScalarResource{{Name: "pods", Value: "10"}}...)) 195 case01Pod1 := buildPod("c1", "p1", "n1", v1.PodRunning, BuildResourceList("1", "1G"), []metav1.OwnerReference{}, make(map[string]string)) 196 case01Pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, BuildResourceList("2", "2G"), []metav1.OwnerReference{}, make(map[string]string)) 197 case01Pod3 := buildPod("c1", "p3", "n1", v1.PodRunning, BuildResourceList("6", "6G"), []metav1.OwnerReference{}, make(map[string]string)) 198 199 tests := []struct { 200 name string 201 node *v1.Node 202 updated *v1.Node 203 pods []*v1.Pod 204 expected *NodeInfo 205 expected2 *NodeInfo 206 }{ 207 { 208 name: "add 3 running non-owner pod", 209 node: case01Node1, 210 updated: case01Node2, 211 pods: []*v1.Pod{case01Pod1, case01Pod2, case01Pod3}, 212 expected: &NodeInfo{ 213 Name: "n1", 214 Node: case01Node2, 215 Idle: buildResource("-1", "-1G", map[string]string{"pods": "7"}, 10), 216 Used: buildResource("9", "9G", map[string]string{"pods": "3"}, 0), 217 OversubscriptionResource: EmptyResource(), 218 Releasing: EmptyResource(), 219 Pipelined: EmptyResource(), 220 Allocatable: buildResource("8", "8G", map[string]string{"pods": "10"}, 10), 221 Capacity: buildResource("8", "8G", map[string]string{"pods": "10"}, 10), 222 ResourceUsage: &NodeUsage{}, 223 State: NodeState{Phase: Ready, Reason: ""}, 224 Tasks: map[TaskID]*TaskInfo{ 225 "c1/p1": NewTaskInfo(case01Pod1), 226 "c1/p2": NewTaskInfo(case01Pod2), 227 "c1/p3": NewTaskInfo(case01Pod3), 228 }, 229 Others: map[string]interface{}{ 230 GPUSharingDevice: gpushare.NewGPUDevices("n1", case01Node1), 231 vgpu.DeviceName: vgpu.NewGPUDevices("n1", case01Node1), 232 }, 233 ImageStates: make(map[string]*k8sframework.ImageStateSummary), 234 }, 235 expected2: &NodeInfo{ 236 Name: "n1", 237 Node: case01Node1, 238 Idle: buildResource("1", "1G", map[string]string{"pods": "12"}, 15), 239 Used: buildResource("9", "9G", map[string]string{"pods": "3"}, 0), 240 OversubscriptionResource: EmptyResource(), 241 Releasing: EmptyResource(), 242 Pipelined: EmptyResource(), 243 Allocatable: buildResource("10", "10G", map[string]string{"pods": "15"}, 15), 244 Capacity: buildResource("10", "10G", map[string]string{"pods": "15"}, 15), 245 ResourceUsage: &NodeUsage{}, 246 State: NodeState{Phase: Ready, Reason: ""}, 247 Tasks: map[TaskID]*TaskInfo{ 248 "c1/p1": NewTaskInfo(case01Pod1), 249 "c1/p2": NewTaskInfo(case01Pod2), 250 "c1/p3": NewTaskInfo(case01Pod3), 251 }, 252 Others: map[string]interface{}{ 253 GPUSharingDevice: gpushare.NewGPUDevices("n1", case01Node1), 254 vgpu.DeviceName: vgpu.NewGPUDevices("n1", case01Node1), 255 }, 256 ImageStates: make(map[string]*k8sframework.ImageStateSummary), 257 }, 258 }, 259 } 260 261 for i, test := range tests { 262 ni := NewNodeInfo(test.node) 263 for _, pod := range test.pods { 264 pi := NewTaskInfo(pod) 265 ni.AddTask(pi) 266 ni.Name = pod.Spec.NodeName 267 } 268 269 // OutOfSync. e.g.: nvidia-device-plugin is down causes gpus turn from 8 to 0 (node.status.allocatable."nvidia.com/gpu": 0) 270 ni.SetNode(test.updated) 271 if !nodeInfoEqual(ni, test.expected) { 272 t.Errorf("node info %d: \n expected\t%v, \n got\t\t%v \n", 273 i, test.expected, ni) 274 } 275 276 // Recover. e.g.: nvidia-device-plugin is restarted successfully 277 ni.SetNode(test.node) 278 if !nodeInfoEqual(ni, test.expected2) { 279 t.Errorf("recovered %d: \n expected\t%v, \n got\t\t%v \n", 280 i, test.expected2, ni) 281 } 282 } 283 }