github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/config_test.go (about) 1 // Package integration_test. 2 /* 3 * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package integration_test 6 7 import ( 8 "strconv" 9 "testing" 10 "time" 11 12 "github.com/NVIDIA/aistore/api" 13 "github.com/NVIDIA/aistore/api/apc" 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/core/meta" 17 "github.com/NVIDIA/aistore/tools" 18 "github.com/NVIDIA/aistore/tools/tassert" 19 "github.com/NVIDIA/aistore/tools/tlog" 20 "github.com/NVIDIA/aistore/xact" 21 ) 22 23 // Note: Run these tests on both K8s and local. 24 // Minikube doesn't use TestingEnv which doesn't limit number of corner cases tested. 25 26 const errWMConfigNotExpected = "expected 'disk.disk_util_low_wm' to be %d, got: %d" 27 28 func TestConfig(t *testing.T) { 29 var ( 30 highWM = int32(80) 31 lowWM = int32(60) 32 cleanupWM = int32(55) 33 updTime = time.Second * 20 34 configRegression = map[string]string{ 35 "periodic.stats_time": updTime.String(), 36 "space.cleanupwm": strconv.Itoa(int(cleanupWM)), 37 "space.lowwm": strconv.Itoa(int(lowWM)), 38 "space.highwm": strconv.Itoa(int(highWM)), 39 "lru.enabled": "true", 40 "lru.capacity_upd_time": updTime.String(), 41 "lru.dont_evict_time": updTime.String(), 42 } 43 oconfig = tools.GetClusterConfig(t) 44 ospaceconfig = oconfig.Space 45 olruconfig = oconfig.LRU 46 operiodic = oconfig.Periodic 47 ) 48 defer tools.SetClusterConfig(t, cos.StrKVs{ 49 "periodic.stats_time": oconfig.Periodic.StatsTime.String(), 50 "space.cleanupwm": strconv.Itoa(int(oconfig.Space.CleanupWM)), 51 "space.lowwm": strconv.Itoa(int(oconfig.Space.LowWM)), 52 "space.highwm": strconv.Itoa(int(oconfig.Space.HighWM)), 53 "lru.enabled": strconv.FormatBool(oconfig.LRU.Enabled), 54 "lru.capacity_upd_time": oconfig.LRU.CapacityUpdTime.String(), 55 "lru.dont_evict_time": oconfig.LRU.DontEvictTime.String(), 56 }) 57 58 tools.SetClusterConfig(t, configRegression) 59 60 nconfig := tools.GetClusterConfig(t) 61 nlruconfig := nconfig.LRU 62 nspaceconfig := nconfig.Space 63 nperiodic := nconfig.Periodic 64 65 if v, _ := time.ParseDuration(configRegression["periodic.stats_time"]); nperiodic.StatsTime != cos.Duration(v) { 66 t.Errorf("StatsTime was not set properly: %v, should be: %v", 67 nperiodic.StatsTime, configRegression["periodic.stats_time"]) 68 } else { 69 o := operiodic.StatsTime 70 tools.SetClusterConfig(t, cos.StrKVs{"periodic.stats_time": o.String()}) 71 } 72 if v, _ := time.ParseDuration(configRegression["lru.dont_evict_time"]); nlruconfig.DontEvictTime != cos.Duration(v) { 73 t.Errorf("DontEvictTime was not set properly: %v, should be: %v", 74 nlruconfig.DontEvictTime, configRegression["lru.dont_evict_time"]) 75 } else { 76 o := olruconfig.DontEvictTime 77 tools.SetClusterConfig(t, cos.StrKVs{"lru.dont_evict_time": o.String()}) 78 } 79 80 if v, _ := time.ParseDuration(configRegression["lru.capacity_upd_time"]); nlruconfig.CapacityUpdTime != cos.Duration(v) { 81 t.Errorf("CapacityUpdTime was not set properly: %v, should be: %v", 82 nlruconfig.CapacityUpdTime, configRegression["lru.capacity_upd_time"]) 83 } else { 84 o := olruconfig.CapacityUpdTime 85 tools.SetClusterConfig(t, cos.StrKVs{"lru.capacity_upd_time": o.String()}) 86 } 87 if hw, err := strconv.Atoi(configRegression["space.highwm"]); err != nil { 88 t.Fatalf("Error parsing HighWM: %v", err) 89 } else if nspaceconfig.HighWM != int64(hw) { 90 t.Errorf("HighWatermark was not set properly: %d, should be: %d", 91 nspaceconfig.HighWM, hw) 92 } else { 93 oldhwmStr, err := cos.ConvertToString(ospaceconfig.HighWM) 94 if err != nil { 95 t.Fatalf("Error parsing HighWM: %v", err) 96 } 97 tools.SetClusterConfig(t, cos.StrKVs{"space.highwm": oldhwmStr}) 98 } 99 if lw, err := strconv.Atoi(configRegression["space.lowwm"]); err != nil { 100 t.Fatalf("Error parsing LowWM: %v", err) 101 } else if nspaceconfig.LowWM != int64(lw) { 102 t.Errorf("LowWatermark was not set properly: %d, should be: %d", 103 nspaceconfig.LowWM, lw) 104 } else { 105 oldlwmStr, err := cos.ConvertToString(ospaceconfig.LowWM) 106 if err != nil { 107 t.Fatalf("Error parsing LowWM: %v", err) 108 } 109 tools.SetClusterConfig(t, cos.StrKVs{"space.lowwm": oldlwmStr}) 110 } 111 if pt, err := cos.ParseBool(configRegression["lru.enabled"]); err != nil { 112 t.Fatalf("Error parsing lru.enabled: %v", err) 113 } else if nlruconfig.Enabled != pt { 114 t.Errorf("lru.enabled was not set properly: %v, should be %v", 115 nlruconfig.Enabled, pt) 116 } else { 117 tools.SetClusterConfig(t, cos.StrKVs{"lru.enabled": strconv.FormatBool(olruconfig.Enabled)}) 118 } 119 } 120 121 func TestConfigGet(t *testing.T) { 122 smap := tools.GetClusterMap(t, tools.GetPrimaryURL()) 123 124 proxy, err := smap.GetRandProxy(false) 125 tassert.CheckFatal(t, err) 126 tools.GetDaemonConfig(t, proxy) 127 128 target, err := smap.GetRandTarget() 129 tassert.CheckFatal(t, err) 130 tools.GetDaemonConfig(t, target) 131 } 132 133 func TestConfigSetGlobal(t *testing.T) { 134 var ( 135 ecCondition bool 136 smap = tools.GetClusterMap(t, tools.GetPrimaryURL()) 137 config = tools.GetClusterConfig(t) 138 check = func(snode *meta.Snode, c *cmn.Config) { 139 tassert.Errorf(t, c.EC.Enabled == ecCondition, 140 "%s expected 'ec.enabled' to be %v, got %v", snode, ecCondition, c.EC.Enabled) 141 } 142 ) 143 ecCondition = !config.EC.Enabled 144 toUpdate := &cmn.ConfigToSet{EC: &cmn.ECConfToSet{ 145 Enabled: apc.Ptr(ecCondition), 146 }} 147 148 tools.SetClusterConfigUsingMsg(t, toUpdate) 149 checkConfig(t, smap, check) 150 151 // Reset config 152 ecCondition = config.EC.Enabled 153 tools.SetClusterConfig(t, cos.StrKVs{ 154 "ec.enabled": strconv.FormatBool(ecCondition), 155 }) 156 checkConfig(t, smap, check) 157 158 // wait for ec 159 flt := xact.ArgsMsg{Kind: apc.ActECEncode} 160 _, _ = api.WaitForXactionIC(baseParams, &flt) 161 } 162 163 func TestConfigFailOverrideClusterOnly(t *testing.T) { 164 var ( 165 proxyURL = tools.GetPrimaryURL() 166 baseParams = tools.BaseAPIParams(proxyURL) 167 smap = tools.GetClusterMap(t, proxyURL) 168 config = tools.GetClusterConfig(t) 169 ) 170 proxy, err := smap.GetRandProxy(false /*exclude primary*/) 171 tassert.CheckFatal(t, err) 172 173 // Try overriding cluster only config on a daemon 174 err = api.SetDaemonConfig(baseParams, proxy.ID(), cos.StrKVs{"ec.enabled": strconv.FormatBool(!config.EC.Enabled)}) 175 tassert.Fatalf(t, err != nil, "expected error to occur when trying to override cluster only config") 176 177 daemonConfig := tools.GetDaemonConfig(t, proxy) 178 tassert.Errorf(t, daemonConfig.EC.Enabled == config.EC.Enabled, 179 "expected 'ec.enabled' to be %v, got: %v", config.EC.Enabled, daemonConfig.EC.Enabled) 180 181 // wait for ec 182 flt := xact.ArgsMsg{Kind: apc.ActECEncode} 183 _, _ = api.WaitForXactionIC(baseParams, &flt) 184 } 185 186 func TestConfigOverrideAndRestart(t *testing.T) { 187 tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2}) 188 var ( 189 proxyURL = tools.GetPrimaryURL() 190 baseParams = tools.BaseAPIParams(proxyURL) 191 smap = tools.GetClusterMap(t, proxyURL) 192 config = tools.GetClusterConfig(t) 193 origProxyCnt = smap.CountActivePs() 194 origTargetCnt = smap.CountActiveTs() 195 ) 196 proxy, err := smap.GetRandProxy(true /*exclude primary*/) 197 tassert.CheckFatal(t, err) 198 199 // Override cluster config on the selected proxy 200 newLowWM := config.Disk.DiskUtilLowWM - 10 201 err = api.SetDaemonConfig(baseParams, proxy.ID(), 202 cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)}) 203 tassert.CheckFatal(t, err) 204 205 daemonConfig := tools.GetDaemonConfig(t, proxy) 206 tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM, 207 errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM) 208 209 // Restart and check that config persisted 210 tlog.Logf("Killing %s\n", proxy.StringEx()) 211 cmd, err := tools.KillNode(proxy) 212 tassert.CheckFatal(t, err) 213 smap, err = tools.WaitForClusterState(proxyURL, "proxy removed", smap.Version, origProxyCnt-1, origTargetCnt) 214 tassert.CheckFatal(t, err) 215 216 err = tools.RestoreNode(cmd, false, apc.Proxy) 217 tassert.CheckFatal(t, err) 218 _, err = tools.WaitForClusterState(proxyURL, "proxy restored", smap.Version, origProxyCnt, origTargetCnt) 219 tassert.CheckFatal(t, err) 220 221 daemonConfig = tools.GetDaemonConfig(t, proxy) 222 tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM, 223 errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM) 224 225 // Reset node config. 226 err = api.SetDaemonConfig(baseParams, proxy.ID(), 227 cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(config.Disk.DiskUtilLowWM, 10)}) 228 tassert.CheckFatal(t, err) 229 } 230 231 func TestConfigSyncToNewNode(t *testing.T) { 232 tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2}) 233 var ( 234 proxyURL = tools.GetPrimaryURL() 235 smap = tools.GetClusterMap(t, proxyURL) 236 config = tools.GetClusterConfig(t) 237 origProxyCnt = smap.CountActivePs() 238 origTargetCnt = smap.CountActiveTs() 239 ) 240 // 1. Kill random non-primary 241 proxy, err := smap.GetRandProxy(true /*exclude primary*/) 242 tassert.CheckFatal(t, err) 243 244 tlog.Logf("Killing %s\n", proxy.StringEx()) 245 cmd, err := tools.KillNode(proxy) 246 tassert.CheckFatal(t, err) 247 248 t.Cleanup(func() { 249 tools.SetClusterConfig(t, cos.StrKVs{ 250 "ec.enabled": strconv.FormatBool(config.EC.Enabled), 251 }) 252 }) 253 254 smap, err = tools.WaitForClusterState(proxyURL, "proxy removed", smap.Version, origProxyCnt-1, origTargetCnt) 255 tassert.CheckError(t, err) 256 if err != nil || smap.Primary.ID() == proxy.ID() { 257 time.Sleep(time.Second) 258 _ = tools.RestoreNode(cmd, false, apc.Proxy) 259 time.Sleep(time.Second) 260 t.Fatalf("failed to kill %s, %s", proxy, smap.StringEx()) 261 } 262 263 // 2. After proxy is killed, update cluster configuration 264 newECEnabled := !config.EC.Enabled 265 tlog.Logf("Globally changing ec.enabled to %t (%s)\n", newECEnabled, smap.StringEx()) 266 tools.SetClusterConfig(t, cos.StrKVs{ 267 "ec.enabled": strconv.FormatBool(newECEnabled), 268 }) 269 270 // 3. Restart proxy 271 err = tools.RestoreNode(cmd, false, apc.Proxy) 272 tassert.CheckFatal(t, err) 273 _, err = tools.WaitForClusterState(proxyURL, "proxy restored", smap.Version, origProxyCnt, origTargetCnt) 274 tassert.CheckFatal(t, err) 275 276 // 4. Ensure the proxy has lastest updated config 277 daemonConfig := tools.GetDaemonConfig(t, proxy) 278 tassert.Fatalf(t, daemonConfig.EC.Enabled == newECEnabled, 279 "expected 'ec.Enabled' to be %v, got: %v", newECEnabled, daemonConfig.EC.Enabled) 280 281 // wait for ec 282 flt := xact.ArgsMsg{Kind: apc.ActECEncode} 283 _, _ = api.WaitForXactionIC(baseParams, &flt) 284 } 285 286 func checkConfig(t *testing.T, smap *meta.Smap, check func(*meta.Snode, *cmn.Config)) { 287 for _, node := range smap.Pmap { 288 config := tools.GetDaemonConfig(t, node) 289 check(node, config) 290 } 291 for _, node := range smap.Tmap { 292 config := tools.GetDaemonConfig(t, node) 293 check(node, config) 294 } 295 } 296 297 func TestConfigOverrideAndResetDaemon(t *testing.T) { 298 tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2}) 299 var ( 300 proxyURL = tools.GetPrimaryURL() 301 baseParams = tools.BaseAPIParams(proxyURL) 302 smap = tools.GetClusterMap(t, proxyURL) 303 config = tools.GetClusterConfig(t) 304 ) 305 proxy, err := smap.GetRandProxy(true /*exclude primary*/) 306 tassert.CheckFatal(t, err) 307 308 // Override a cluster config on daemon 309 newLowWM := config.Disk.DiskUtilLowWM - 10 310 err = api.SetDaemonConfig(baseParams, proxy.ID(), 311 cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)}) 312 tassert.CheckFatal(t, err) 313 314 daemonConfig := tools.GetDaemonConfig(t, proxy) 315 tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM, 316 errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM) 317 318 // Reset daemon and check if the override is gone. 319 err = api.ResetDaemonConfig(baseParams, proxy.ID()) 320 tassert.CheckFatal(t, err) 321 daemonConfig = tools.GetDaemonConfig(t, proxy) 322 tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == config.Disk.DiskUtilLowWM, 323 errWMConfigNotExpected, config.Disk.DiskUtilLowWM, daemonConfig.Disk.DiskUtilLowWM) 324 } 325 326 func TestConfigOverrideAndResetCluster(t *testing.T) { 327 tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2}) 328 var ( 329 daemonConfig *cmn.Config 330 proxyURL = tools.GetPrimaryURL() 331 baseParams = tools.BaseAPIParams(proxyURL) 332 smap = tools.GetClusterMap(t, proxyURL) 333 config = tools.GetClusterConfig(t) 334 newLowWM = config.Disk.DiskUtilLowWM - 10 335 ) 336 proxy, err := smap.GetRandProxy(true /*exclude primary*/) 337 tassert.CheckFatal(t, err) 338 339 // Override a cluster config on daemon and primary 340 primary, err := tools.GetPrimaryProxy(proxyURL) 341 tassert.CheckFatal(t, err) 342 for _, node := range []*meta.Snode{primary, proxy} { 343 err = api.SetDaemonConfig(baseParams, node.ID(), 344 cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)}) 345 tassert.CheckFatal(t, err) 346 347 daemonConfig = tools.GetDaemonConfig(t, node) 348 tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM, 349 errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM) 350 } 351 352 // Reset all daemons and check if the override is gone. 353 err = api.ResetClusterConfig(baseParams) 354 tassert.CheckFatal(t, err) 355 for _, node := range []*meta.Snode{primary, proxy} { 356 daemonConfig = tools.GetDaemonConfig(t, node) 357 tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == config.Disk.DiskUtilLowWM, 358 errWMConfigNotExpected, config.Disk.DiskUtilLowWM, daemonConfig.Disk.DiskUtilLowWM) 359 } 360 }