github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/config_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"strconv"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/NVIDIA/aistore/api"
    13  	"github.com/NVIDIA/aistore/api/apc"
    14  	"github.com/NVIDIA/aistore/cmn"
    15  	"github.com/NVIDIA/aistore/cmn/cos"
    16  	"github.com/NVIDIA/aistore/core/meta"
    17  	"github.com/NVIDIA/aistore/tools"
    18  	"github.com/NVIDIA/aistore/tools/tassert"
    19  	"github.com/NVIDIA/aistore/tools/tlog"
    20  	"github.com/NVIDIA/aistore/xact"
    21  )
    22  
    23  // Note: Run these tests on both K8s and local.
    24  // Minikube doesn't use TestingEnv which doesn't limit number of corner cases tested.
    25  
    26  const errWMConfigNotExpected = "expected 'disk.disk_util_low_wm' to be %d, got: %d"
    27  
    28  func TestConfig(t *testing.T) {
    29  	var (
    30  		highWM           = int32(80)
    31  		lowWM            = int32(60)
    32  		cleanupWM        = int32(55)
    33  		updTime          = time.Second * 20
    34  		configRegression = map[string]string{
    35  			"periodic.stats_time":   updTime.String(),
    36  			"space.cleanupwm":       strconv.Itoa(int(cleanupWM)),
    37  			"space.lowwm":           strconv.Itoa(int(lowWM)),
    38  			"space.highwm":          strconv.Itoa(int(highWM)),
    39  			"lru.enabled":           "true",
    40  			"lru.capacity_upd_time": updTime.String(),
    41  			"lru.dont_evict_time":   updTime.String(),
    42  		}
    43  		oconfig      = tools.GetClusterConfig(t)
    44  		ospaceconfig = oconfig.Space
    45  		olruconfig   = oconfig.LRU
    46  		operiodic    = oconfig.Periodic
    47  	)
    48  	defer tools.SetClusterConfig(t, cos.StrKVs{
    49  		"periodic.stats_time":   oconfig.Periodic.StatsTime.String(),
    50  		"space.cleanupwm":       strconv.Itoa(int(oconfig.Space.CleanupWM)),
    51  		"space.lowwm":           strconv.Itoa(int(oconfig.Space.LowWM)),
    52  		"space.highwm":          strconv.Itoa(int(oconfig.Space.HighWM)),
    53  		"lru.enabled":           strconv.FormatBool(oconfig.LRU.Enabled),
    54  		"lru.capacity_upd_time": oconfig.LRU.CapacityUpdTime.String(),
    55  		"lru.dont_evict_time":   oconfig.LRU.DontEvictTime.String(),
    56  	})
    57  
    58  	tools.SetClusterConfig(t, configRegression)
    59  
    60  	nconfig := tools.GetClusterConfig(t)
    61  	nlruconfig := nconfig.LRU
    62  	nspaceconfig := nconfig.Space
    63  	nperiodic := nconfig.Periodic
    64  
    65  	if v, _ := time.ParseDuration(configRegression["periodic.stats_time"]); nperiodic.StatsTime != cos.Duration(v) {
    66  		t.Errorf("StatsTime was not set properly: %v, should be: %v",
    67  			nperiodic.StatsTime, configRegression["periodic.stats_time"])
    68  	} else {
    69  		o := operiodic.StatsTime
    70  		tools.SetClusterConfig(t, cos.StrKVs{"periodic.stats_time": o.String()})
    71  	}
    72  	if v, _ := time.ParseDuration(configRegression["lru.dont_evict_time"]); nlruconfig.DontEvictTime != cos.Duration(v) {
    73  		t.Errorf("DontEvictTime was not set properly: %v, should be: %v",
    74  			nlruconfig.DontEvictTime, configRegression["lru.dont_evict_time"])
    75  	} else {
    76  		o := olruconfig.DontEvictTime
    77  		tools.SetClusterConfig(t, cos.StrKVs{"lru.dont_evict_time": o.String()})
    78  	}
    79  
    80  	if v, _ := time.ParseDuration(configRegression["lru.capacity_upd_time"]); nlruconfig.CapacityUpdTime != cos.Duration(v) {
    81  		t.Errorf("CapacityUpdTime was not set properly: %v, should be: %v",
    82  			nlruconfig.CapacityUpdTime, configRegression["lru.capacity_upd_time"])
    83  	} else {
    84  		o := olruconfig.CapacityUpdTime
    85  		tools.SetClusterConfig(t, cos.StrKVs{"lru.capacity_upd_time": o.String()})
    86  	}
    87  	if hw, err := strconv.Atoi(configRegression["space.highwm"]); err != nil {
    88  		t.Fatalf("Error parsing HighWM: %v", err)
    89  	} else if nspaceconfig.HighWM != int64(hw) {
    90  		t.Errorf("HighWatermark was not set properly: %d, should be: %d",
    91  			nspaceconfig.HighWM, hw)
    92  	} else {
    93  		oldhwmStr, err := cos.ConvertToString(ospaceconfig.HighWM)
    94  		if err != nil {
    95  			t.Fatalf("Error parsing HighWM: %v", err)
    96  		}
    97  		tools.SetClusterConfig(t, cos.StrKVs{"space.highwm": oldhwmStr})
    98  	}
    99  	if lw, err := strconv.Atoi(configRegression["space.lowwm"]); err != nil {
   100  		t.Fatalf("Error parsing LowWM: %v", err)
   101  	} else if nspaceconfig.LowWM != int64(lw) {
   102  		t.Errorf("LowWatermark was not set properly: %d, should be: %d",
   103  			nspaceconfig.LowWM, lw)
   104  	} else {
   105  		oldlwmStr, err := cos.ConvertToString(ospaceconfig.LowWM)
   106  		if err != nil {
   107  			t.Fatalf("Error parsing LowWM: %v", err)
   108  		}
   109  		tools.SetClusterConfig(t, cos.StrKVs{"space.lowwm": oldlwmStr})
   110  	}
   111  	if pt, err := cos.ParseBool(configRegression["lru.enabled"]); err != nil {
   112  		t.Fatalf("Error parsing lru.enabled: %v", err)
   113  	} else if nlruconfig.Enabled != pt {
   114  		t.Errorf("lru.enabled was not set properly: %v, should be %v",
   115  			nlruconfig.Enabled, pt)
   116  	} else {
   117  		tools.SetClusterConfig(t, cos.StrKVs{"lru.enabled": strconv.FormatBool(olruconfig.Enabled)})
   118  	}
   119  }
   120  
   121  func TestConfigGet(t *testing.T) {
   122  	smap := tools.GetClusterMap(t, tools.GetPrimaryURL())
   123  
   124  	proxy, err := smap.GetRandProxy(false)
   125  	tassert.CheckFatal(t, err)
   126  	tools.GetDaemonConfig(t, proxy)
   127  
   128  	target, err := smap.GetRandTarget()
   129  	tassert.CheckFatal(t, err)
   130  	tools.GetDaemonConfig(t, target)
   131  }
   132  
   133  func TestConfigSetGlobal(t *testing.T) {
   134  	var (
   135  		ecCondition bool
   136  		smap        = tools.GetClusterMap(t, tools.GetPrimaryURL())
   137  		config      = tools.GetClusterConfig(t)
   138  		check       = func(snode *meta.Snode, c *cmn.Config) {
   139  			tassert.Errorf(t, c.EC.Enabled == ecCondition,
   140  				"%s expected 'ec.enabled' to be %v, got %v", snode, ecCondition, c.EC.Enabled)
   141  		}
   142  	)
   143  	ecCondition = !config.EC.Enabled
   144  	toUpdate := &cmn.ConfigToSet{EC: &cmn.ECConfToSet{
   145  		Enabled: apc.Ptr(ecCondition),
   146  	}}
   147  
   148  	tools.SetClusterConfigUsingMsg(t, toUpdate)
   149  	checkConfig(t, smap, check)
   150  
   151  	// Reset config
   152  	ecCondition = config.EC.Enabled
   153  	tools.SetClusterConfig(t, cos.StrKVs{
   154  		"ec.enabled": strconv.FormatBool(ecCondition),
   155  	})
   156  	checkConfig(t, smap, check)
   157  
   158  	// wait for ec
   159  	flt := xact.ArgsMsg{Kind: apc.ActECEncode}
   160  	_, _ = api.WaitForXactionIC(baseParams, &flt)
   161  }
   162  
   163  func TestConfigFailOverrideClusterOnly(t *testing.T) {
   164  	var (
   165  		proxyURL   = tools.GetPrimaryURL()
   166  		baseParams = tools.BaseAPIParams(proxyURL)
   167  		smap       = tools.GetClusterMap(t, proxyURL)
   168  		config     = tools.GetClusterConfig(t)
   169  	)
   170  	proxy, err := smap.GetRandProxy(false /*exclude primary*/)
   171  	tassert.CheckFatal(t, err)
   172  
   173  	// Try overriding cluster only config on a daemon
   174  	err = api.SetDaemonConfig(baseParams, proxy.ID(), cos.StrKVs{"ec.enabled": strconv.FormatBool(!config.EC.Enabled)})
   175  	tassert.Fatalf(t, err != nil, "expected error to occur when trying to override cluster only config")
   176  
   177  	daemonConfig := tools.GetDaemonConfig(t, proxy)
   178  	tassert.Errorf(t, daemonConfig.EC.Enabled == config.EC.Enabled,
   179  		"expected 'ec.enabled' to be %v, got: %v", config.EC.Enabled, daemonConfig.EC.Enabled)
   180  
   181  	// wait for ec
   182  	flt := xact.ArgsMsg{Kind: apc.ActECEncode}
   183  	_, _ = api.WaitForXactionIC(baseParams, &flt)
   184  }
   185  
   186  func TestConfigOverrideAndRestart(t *testing.T) {
   187  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2})
   188  	var (
   189  		proxyURL      = tools.GetPrimaryURL()
   190  		baseParams    = tools.BaseAPIParams(proxyURL)
   191  		smap          = tools.GetClusterMap(t, proxyURL)
   192  		config        = tools.GetClusterConfig(t)
   193  		origProxyCnt  = smap.CountActivePs()
   194  		origTargetCnt = smap.CountActiveTs()
   195  	)
   196  	proxy, err := smap.GetRandProxy(true /*exclude primary*/)
   197  	tassert.CheckFatal(t, err)
   198  
   199  	// Override cluster config on the selected proxy
   200  	newLowWM := config.Disk.DiskUtilLowWM - 10
   201  	err = api.SetDaemonConfig(baseParams, proxy.ID(),
   202  		cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)})
   203  	tassert.CheckFatal(t, err)
   204  
   205  	daemonConfig := tools.GetDaemonConfig(t, proxy)
   206  	tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM,
   207  		errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM)
   208  
   209  	// Restart and check that config persisted
   210  	tlog.Logf("Killing %s\n", proxy.StringEx())
   211  	cmd, err := tools.KillNode(proxy)
   212  	tassert.CheckFatal(t, err)
   213  	smap, err = tools.WaitForClusterState(proxyURL, "proxy removed", smap.Version, origProxyCnt-1, origTargetCnt)
   214  	tassert.CheckFatal(t, err)
   215  
   216  	err = tools.RestoreNode(cmd, false, apc.Proxy)
   217  	tassert.CheckFatal(t, err)
   218  	_, err = tools.WaitForClusterState(proxyURL, "proxy restored", smap.Version, origProxyCnt, origTargetCnt)
   219  	tassert.CheckFatal(t, err)
   220  
   221  	daemonConfig = tools.GetDaemonConfig(t, proxy)
   222  	tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM,
   223  		errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM)
   224  
   225  	// Reset node config.
   226  	err = api.SetDaemonConfig(baseParams, proxy.ID(),
   227  		cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(config.Disk.DiskUtilLowWM, 10)})
   228  	tassert.CheckFatal(t, err)
   229  }
   230  
   231  func TestConfigSyncToNewNode(t *testing.T) {
   232  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2})
   233  	var (
   234  		proxyURL      = tools.GetPrimaryURL()
   235  		smap          = tools.GetClusterMap(t, proxyURL)
   236  		config        = tools.GetClusterConfig(t)
   237  		origProxyCnt  = smap.CountActivePs()
   238  		origTargetCnt = smap.CountActiveTs()
   239  	)
   240  	// 1. Kill random non-primary
   241  	proxy, err := smap.GetRandProxy(true /*exclude primary*/)
   242  	tassert.CheckFatal(t, err)
   243  
   244  	tlog.Logf("Killing %s\n", proxy.StringEx())
   245  	cmd, err := tools.KillNode(proxy)
   246  	tassert.CheckFatal(t, err)
   247  
   248  	t.Cleanup(func() {
   249  		tools.SetClusterConfig(t, cos.StrKVs{
   250  			"ec.enabled": strconv.FormatBool(config.EC.Enabled),
   251  		})
   252  	})
   253  
   254  	smap, err = tools.WaitForClusterState(proxyURL, "proxy removed", smap.Version, origProxyCnt-1, origTargetCnt)
   255  	tassert.CheckError(t, err)
   256  	if err != nil || smap.Primary.ID() == proxy.ID() {
   257  		time.Sleep(time.Second)
   258  		_ = tools.RestoreNode(cmd, false, apc.Proxy)
   259  		time.Sleep(time.Second)
   260  		t.Fatalf("failed to kill %s, %s", proxy, smap.StringEx())
   261  	}
   262  
   263  	// 2. After proxy is killed, update cluster configuration
   264  	newECEnabled := !config.EC.Enabled
   265  	tlog.Logf("Globally changing ec.enabled to %t (%s)\n", newECEnabled, smap.StringEx())
   266  	tools.SetClusterConfig(t, cos.StrKVs{
   267  		"ec.enabled": strconv.FormatBool(newECEnabled),
   268  	})
   269  
   270  	// 3. Restart proxy
   271  	err = tools.RestoreNode(cmd, false, apc.Proxy)
   272  	tassert.CheckFatal(t, err)
   273  	_, err = tools.WaitForClusterState(proxyURL, "proxy restored", smap.Version, origProxyCnt, origTargetCnt)
   274  	tassert.CheckFatal(t, err)
   275  
   276  	// 4. Ensure the proxy has lastest updated config
   277  	daemonConfig := tools.GetDaemonConfig(t, proxy)
   278  	tassert.Fatalf(t, daemonConfig.EC.Enabled == newECEnabled,
   279  		"expected 'ec.Enabled' to be %v, got: %v", newECEnabled, daemonConfig.EC.Enabled)
   280  
   281  	// wait for ec
   282  	flt := xact.ArgsMsg{Kind: apc.ActECEncode}
   283  	_, _ = api.WaitForXactionIC(baseParams, &flt)
   284  }
   285  
   286  func checkConfig(t *testing.T, smap *meta.Smap, check func(*meta.Snode, *cmn.Config)) {
   287  	for _, node := range smap.Pmap {
   288  		config := tools.GetDaemonConfig(t, node)
   289  		check(node, config)
   290  	}
   291  	for _, node := range smap.Tmap {
   292  		config := tools.GetDaemonConfig(t, node)
   293  		check(node, config)
   294  	}
   295  }
   296  
   297  func TestConfigOverrideAndResetDaemon(t *testing.T) {
   298  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2})
   299  	var (
   300  		proxyURL   = tools.GetPrimaryURL()
   301  		baseParams = tools.BaseAPIParams(proxyURL)
   302  		smap       = tools.GetClusterMap(t, proxyURL)
   303  		config     = tools.GetClusterConfig(t)
   304  	)
   305  	proxy, err := smap.GetRandProxy(true /*exclude primary*/)
   306  	tassert.CheckFatal(t, err)
   307  
   308  	// Override a cluster config on daemon
   309  	newLowWM := config.Disk.DiskUtilLowWM - 10
   310  	err = api.SetDaemonConfig(baseParams, proxy.ID(),
   311  		cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)})
   312  	tassert.CheckFatal(t, err)
   313  
   314  	daemonConfig := tools.GetDaemonConfig(t, proxy)
   315  	tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM,
   316  		errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM)
   317  
   318  	// Reset daemon and check if the override is gone.
   319  	err = api.ResetDaemonConfig(baseParams, proxy.ID())
   320  	tassert.CheckFatal(t, err)
   321  	daemonConfig = tools.GetDaemonConfig(t, proxy)
   322  	tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == config.Disk.DiskUtilLowWM,
   323  		errWMConfigNotExpected, config.Disk.DiskUtilLowWM, daemonConfig.Disk.DiskUtilLowWM)
   324  }
   325  
   326  func TestConfigOverrideAndResetCluster(t *testing.T) {
   327  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal, MinProxies: 2})
   328  	var (
   329  		daemonConfig *cmn.Config
   330  		proxyURL     = tools.GetPrimaryURL()
   331  		baseParams   = tools.BaseAPIParams(proxyURL)
   332  		smap         = tools.GetClusterMap(t, proxyURL)
   333  		config       = tools.GetClusterConfig(t)
   334  		newLowWM     = config.Disk.DiskUtilLowWM - 10
   335  	)
   336  	proxy, err := smap.GetRandProxy(true /*exclude primary*/)
   337  	tassert.CheckFatal(t, err)
   338  
   339  	// Override a cluster config on daemon and primary
   340  	primary, err := tools.GetPrimaryProxy(proxyURL)
   341  	tassert.CheckFatal(t, err)
   342  	for _, node := range []*meta.Snode{primary, proxy} {
   343  		err = api.SetDaemonConfig(baseParams, node.ID(),
   344  			cos.StrKVs{"disk.disk_util_low_wm": strconv.FormatInt(newLowWM, 10)})
   345  		tassert.CheckFatal(t, err)
   346  
   347  		daemonConfig = tools.GetDaemonConfig(t, node)
   348  		tassert.Errorf(t, daemonConfig.Disk.DiskUtilLowWM == newLowWM,
   349  			errWMConfigNotExpected, newLowWM, daemonConfig.Disk.DiskUtilLowWM)
   350  	}
   351  
   352  	// Reset all daemons and check if the override is gone.
   353  	err = api.ResetClusterConfig(baseParams)
   354  	tassert.CheckFatal(t, err)
   355  	for _, node := range []*meta.Snode{primary, proxy} {
   356  		daemonConfig = tools.GetDaemonConfig(t, node)
   357  		tassert.Fatalf(t, daemonConfig.Disk.DiskUtilLowWM == config.Disk.DiskUtilLowWM,
   358  			errWMConfigNotExpected, config.Disk.DiskUtilLowWM, daemonConfig.Disk.DiskUtilLowWM)
   359  	}
   360  }