github.com/argoproj/argo-cd/v2@v2.10.9/controller/cache/cache_test.go (about)

     1  package cache
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"net"
     7  	"net/url"
     8  	"sync"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/stretchr/testify/assert"
    13  	v1 "k8s.io/api/core/v1"
    14  	apierr "k8s.io/apimachinery/pkg/api/errors"
    15  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    16  	"k8s.io/apimachinery/pkg/runtime/schema"
    17  
    18  	"github.com/argoproj/gitops-engine/pkg/cache"
    19  	"github.com/argoproj/gitops-engine/pkg/cache/mocks"
    20  	"github.com/argoproj/gitops-engine/pkg/health"
    21  	"github.com/stretchr/testify/mock"
    22  	"k8s.io/client-go/kubernetes/fake"
    23  
    24  	"github.com/argoproj/argo-cd/v2/common"
    25  	"github.com/argoproj/argo-cd/v2/controller/metrics"
    26  	"github.com/argoproj/argo-cd/v2/controller/sharding"
    27  	appv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1"
    28  	dbmocks "github.com/argoproj/argo-cd/v2/util/db/mocks"
    29  	argosettings "github.com/argoproj/argo-cd/v2/util/settings"
    30  )
    31  
    32  type netError string
    33  
    34  func (n netError) Error() string   { return string(n) }
    35  func (n netError) Timeout() bool   { return false }
    36  func (n netError) Temporary() bool { return false }
    37  
    38  func TestHandleModEvent_HasChanges(t *testing.T) {
    39  	clusterCache := &mocks.ClusterCache{}
    40  	clusterCache.On("Invalidate", mock.Anything, mock.Anything).Return(nil).Once()
    41  	clusterCache.On("EnsureSynced").Return(nil).Once()
    42  	db := &dbmocks.ArgoDB{}
    43  	db.On("GetApplicationControllerReplicas").Return(1)
    44  	clustersCache := liveStateCache{
    45  		clusters: map[string]cache.ClusterCache{
    46  			"https://mycluster": clusterCache,
    47  		},
    48  		clusterSharding: sharding.NewClusterSharding(db, 0, 1, common.DefaultShardingAlgorithm),
    49  	}
    50  
    51  	clustersCache.handleModEvent(&appv1.Cluster{
    52  		Server: "https://mycluster",
    53  		Config: appv1.ClusterConfig{Username: "foo"},
    54  	}, &appv1.Cluster{
    55  		Server:     "https://mycluster",
    56  		Config:     appv1.ClusterConfig{Username: "bar"},
    57  		Namespaces: []string{"default"},
    58  	})
    59  }
    60  
    61  func TestHandleModEvent_ClusterExcluded(t *testing.T) {
    62  	clusterCache := &mocks.ClusterCache{}
    63  	clusterCache.On("Invalidate", mock.Anything, mock.Anything).Return(nil).Once()
    64  	clusterCache.On("EnsureSynced").Return(nil).Once()
    65  	db := &dbmocks.ArgoDB{}
    66  	db.On("GetApplicationControllerReplicas").Return(1)
    67  	clustersCache := liveStateCache{
    68  		db:          nil,
    69  		appInformer: nil,
    70  		onObjectUpdated: func(managedByApp map[string]bool, ref v1.ObjectReference) {
    71  		},
    72  		kubectl:       nil,
    73  		settingsMgr:   &argosettings.SettingsManager{},
    74  		metricsServer: &metrics.MetricsServer{},
    75  		// returns a shard that never process any cluster
    76  		clusterSharding:  sharding.NewClusterSharding(db, 0, 1, common.DefaultShardingAlgorithm),
    77  		resourceTracking: nil,
    78  		clusters:         map[string]cache.ClusterCache{"https://mycluster": clusterCache},
    79  		cacheSettings:    cacheSettings{},
    80  		lock:             sync.RWMutex{},
    81  	}
    82  
    83  	clustersCache.handleModEvent(&appv1.Cluster{
    84  		Server: "https://mycluster",
    85  		Config: appv1.ClusterConfig{Username: "foo"},
    86  	}, &appv1.Cluster{
    87  		Server:     "https://mycluster",
    88  		Config:     appv1.ClusterConfig{Username: "bar"},
    89  		Namespaces: []string{"default"},
    90  	})
    91  
    92  	assert.Len(t, clustersCache.clusters, 1)
    93  }
    94  
    95  func TestHandleModEvent_NoChanges(t *testing.T) {
    96  	clusterCache := &mocks.ClusterCache{}
    97  	clusterCache.On("Invalidate", mock.Anything).Panic("should not invalidate")
    98  	clusterCache.On("EnsureSynced").Return(nil).Panic("should not re-sync")
    99  	db := &dbmocks.ArgoDB{}
   100  	db.On("GetApplicationControllerReplicas").Return(1)
   101  	clustersCache := liveStateCache{
   102  		clusters: map[string]cache.ClusterCache{
   103  			"https://mycluster": clusterCache,
   104  		},
   105  		clusterSharding: sharding.NewClusterSharding(db, 0, 1, common.DefaultShardingAlgorithm),
   106  	}
   107  
   108  	clustersCache.handleModEvent(&appv1.Cluster{
   109  		Server: "https://mycluster",
   110  		Config: appv1.ClusterConfig{Username: "bar"},
   111  	}, &appv1.Cluster{
   112  		Server: "https://mycluster",
   113  		Config: appv1.ClusterConfig{Username: "bar"},
   114  	})
   115  }
   116  
   117  func TestHandleAddEvent_ClusterExcluded(t *testing.T) {
   118  	db := &dbmocks.ArgoDB{}
   119  	db.On("GetApplicationControllerReplicas").Return(1)
   120  	clustersCache := liveStateCache{
   121  		clusters:        map[string]cache.ClusterCache{},
   122  		clusterSharding: sharding.NewClusterSharding(db, 0, 2, common.DefaultShardingAlgorithm),
   123  	}
   124  	clustersCache.handleAddEvent(&appv1.Cluster{
   125  		Server: "https://mycluster",
   126  		Config: appv1.ClusterConfig{Username: "bar"},
   127  	})
   128  
   129  	assert.Len(t, clustersCache.clusters, 0)
   130  }
   131  
   132  func TestHandleDeleteEvent_CacheDeadlock(t *testing.T) {
   133  	testCluster := &appv1.Cluster{
   134  		Server: "https://mycluster",
   135  		Config: appv1.ClusterConfig{Username: "bar"},
   136  	}
   137  	db := &dbmocks.ArgoDB{}
   138  	db.On("GetApplicationControllerReplicas").Return(1)
   139  	fakeClient := fake.NewSimpleClientset()
   140  	settingsMgr := argosettings.NewSettingsManager(context.TODO(), fakeClient, "argocd")
   141  	liveStateCacheLock := sync.RWMutex{}
   142  	gitopsEngineClusterCache := &mocks.ClusterCache{}
   143  	clustersCache := liveStateCache{
   144  		clusters: map[string]cache.ClusterCache{
   145  			testCluster.Server: gitopsEngineClusterCache,
   146  		},
   147  		clusterSharding: sharding.NewClusterSharding(db, 0, 1, common.DefaultShardingAlgorithm),
   148  		settingsMgr:     settingsMgr,
   149  		// Set the lock here so we can reference it later
   150  		// nolint We need to overwrite here to have access to the lock
   151  		lock: liveStateCacheLock,
   152  	}
   153  	channel := make(chan string)
   154  	// Mocked lock held by the gitops-engine cluster cache
   155  	gitopsEngineClusterCacheLock := sync.Mutex{}
   156  	// Ensure completion of both EnsureSynced and Invalidate
   157  	ensureSyncedCompleted := sync.Mutex{}
   158  	invalidateCompleted := sync.Mutex{}
   159  	// Locks to force trigger condition during test
   160  	// Condition order:
   161  	//   EnsuredSynced -> Locks gitops-engine
   162  	//   handleDeleteEvent -> Locks liveStateCache
   163  	//   EnsureSynced via sync, newResource, populateResourceInfoHandler -> attempts to Lock liveStateCache
   164  	//   handleDeleteEvent via cluster.Invalidate -> attempts to Lock gitops-engine
   165  	handleDeleteWasCalled := sync.Mutex{}
   166  	engineHoldsEngineLock := sync.Mutex{}
   167  	ensureSyncedCompleted.Lock()
   168  	invalidateCompleted.Lock()
   169  	handleDeleteWasCalled.Lock()
   170  	engineHoldsEngineLock.Lock()
   171  
   172  	gitopsEngineClusterCache.On("EnsureSynced").Run(func(args mock.Arguments) {
   173  		gitopsEngineClusterCacheLock.Lock()
   174  		t.Log("EnsureSynced: Engine has engine lock")
   175  		engineHoldsEngineLock.Unlock()
   176  		defer gitopsEngineClusterCacheLock.Unlock()
   177  		// Wait until handleDeleteEvent holds the liveStateCache lock
   178  		handleDeleteWasCalled.Lock()
   179  		// Try and obtain the liveStateCache lock
   180  		clustersCache.lock.Lock()
   181  		t.Log("EnsureSynced: Engine has LiveStateCache lock")
   182  		clustersCache.lock.Unlock()
   183  		ensureSyncedCompleted.Unlock()
   184  	}).Return(nil).Once()
   185  
   186  	gitopsEngineClusterCache.On("Invalidate").Run(func(args mock.Arguments) {
   187  		// Allow EnsureSynced to continue now that we're in the deadlock condition
   188  		handleDeleteWasCalled.Unlock()
   189  		// Wait until gitops engine holds the gitops lock
   190  		// This prevents timing issues if we reach this point before EnsureSynced has obtained the lock
   191  		engineHoldsEngineLock.Lock()
   192  		t.Log("Invalidate: Engine has engine lock")
   193  		engineHoldsEngineLock.Unlock()
   194  		// Lock engine lock
   195  		gitopsEngineClusterCacheLock.Lock()
   196  		t.Log("Invalidate: Invalidate has engine lock")
   197  		gitopsEngineClusterCacheLock.Unlock()
   198  		invalidateCompleted.Unlock()
   199  	}).Return()
   200  	go func() {
   201  		// Start the gitops-engine lock holds
   202  		go func() {
   203  			err := gitopsEngineClusterCache.EnsureSynced()
   204  			if err != nil {
   205  				assert.Fail(t, err.Error())
   206  			}
   207  		}()
   208  		// Run in background
   209  		go clustersCache.handleDeleteEvent(testCluster.Server)
   210  		// Allow execution to continue on clusters cache call to trigger lock
   211  		ensureSyncedCompleted.Lock()
   212  		invalidateCompleted.Lock()
   213  		t.Log("Competing functions were able to obtain locks")
   214  		invalidateCompleted.Unlock()
   215  		ensureSyncedCompleted.Unlock()
   216  		channel <- "PASSED"
   217  	}()
   218  	select {
   219  	case str := <-channel:
   220  		assert.Equal(t, "PASSED", str, str)
   221  	case <-time.After(5 * time.Second):
   222  		assert.Fail(t, "Ended up in deadlock")
   223  	}
   224  }
   225  
   226  func TestIsRetryableError(t *testing.T) {
   227  	var (
   228  		tlsHandshakeTimeoutErr net.Error = netError("net/http: TLS handshake timeout")
   229  		ioTimeoutErr           net.Error = netError("i/o timeout")
   230  		connectionTimedout     net.Error = netError("connection timed out")
   231  		connectionReset        net.Error = netError("connection reset by peer")
   232  	)
   233  	t.Run("Nil", func(t *testing.T) {
   234  		assert.False(t, isRetryableError(nil))
   235  	})
   236  	t.Run("ResourceQuotaConflictErr", func(t *testing.T) {
   237  		assert.False(t, isRetryableError(apierr.NewConflict(schema.GroupResource{}, "", nil)))
   238  		assert.True(t, isRetryableError(apierr.NewConflict(schema.GroupResource{Group: "v1", Resource: "resourcequotas"}, "", nil)))
   239  	})
   240  	t.Run("ExceededQuotaErr", func(t *testing.T) {
   241  		assert.False(t, isRetryableError(apierr.NewForbidden(schema.GroupResource{}, "", nil)))
   242  		assert.True(t, isRetryableError(apierr.NewForbidden(schema.GroupResource{Group: "v1", Resource: "pods"}, "", errors.New("exceeded quota"))))
   243  	})
   244  	t.Run("TooManyRequestsDNS", func(t *testing.T) {
   245  		assert.True(t, isRetryableError(apierr.NewTooManyRequests("", 0)))
   246  	})
   247  	t.Run("DNSError", func(t *testing.T) {
   248  		assert.True(t, isRetryableError(&net.DNSError{}))
   249  	})
   250  	t.Run("OpError", func(t *testing.T) {
   251  		assert.True(t, isRetryableError(&net.OpError{}))
   252  	})
   253  	t.Run("UnknownNetworkError", func(t *testing.T) {
   254  		assert.True(t, isRetryableError(net.UnknownNetworkError("")))
   255  	})
   256  	t.Run("ConnectionClosedErr", func(t *testing.T) {
   257  		assert.False(t, isRetryableError(&url.Error{Err: errors.New("")}))
   258  		assert.True(t, isRetryableError(&url.Error{Err: errors.New("Connection closed by foreign host")}))
   259  	})
   260  	t.Run("TLSHandshakeTimeout", func(t *testing.T) {
   261  		assert.True(t, isRetryableError(tlsHandshakeTimeoutErr))
   262  	})
   263  	t.Run("IOHandshakeTimeout", func(t *testing.T) {
   264  		assert.True(t, isRetryableError(ioTimeoutErr))
   265  	})
   266  	t.Run("ConnectionTimeout", func(t *testing.T) {
   267  		assert.True(t, isRetryableError(connectionTimedout))
   268  	})
   269  	t.Run("ConnectionReset", func(t *testing.T) {
   270  		assert.True(t, isRetryableError(connectionReset))
   271  	})
   272  }
   273  
   274  func Test_asResourceNode_owner_refs(t *testing.T) {
   275  	resNode := asResourceNode(&cache.Resource{
   276  		ResourceVersion: "",
   277  		Ref: v1.ObjectReference{
   278  			APIVersion: "v1",
   279  		},
   280  		OwnerRefs: []metav1.OwnerReference{
   281  			{
   282  				APIVersion: "v1",
   283  				Kind:       "ConfigMap",
   284  				Name:       "cm-1",
   285  			},
   286  			{
   287  				APIVersion: "v1",
   288  				Kind:       "ConfigMap",
   289  				Name:       "cm-2",
   290  			},
   291  		},
   292  		CreationTimestamp: nil,
   293  		Info:              nil,
   294  		Resource:          nil,
   295  	})
   296  	expected := appv1.ResourceNode{
   297  		ResourceRef: appv1.ResourceRef{
   298  			Version: "v1",
   299  		},
   300  		ParentRefs: []appv1.ResourceRef{
   301  			{
   302  				Group: "",
   303  				Kind:  "ConfigMap",
   304  				Name:  "cm-1",
   305  			},
   306  			{
   307  				Group: "",
   308  				Kind:  "ConfigMap",
   309  				Name:  "cm-2",
   310  			},
   311  		},
   312  		Info:            nil,
   313  		NetworkingInfo:  nil,
   314  		ResourceVersion: "",
   315  		Images:          nil,
   316  		Health:          nil,
   317  		CreatedAt:       nil,
   318  	}
   319  	assert.Equal(t, expected, resNode)
   320  }
   321  
   322  func TestSkipResourceUpdate(t *testing.T) {
   323  	var (
   324  		hash1_x string = "x"
   325  		hash2_y string = "y"
   326  		hash3_x string = "x"
   327  	)
   328  	info := &ResourceInfo{
   329  		manifestHash: hash1_x,
   330  		Health: &health.HealthStatus{
   331  			Status:  health.HealthStatusHealthy,
   332  			Message: "default",
   333  		},
   334  	}
   335  	t.Run("Nil", func(t *testing.T) {
   336  		assert.False(t, skipResourceUpdate(nil, nil))
   337  	})
   338  	t.Run("From Nil", func(t *testing.T) {
   339  		assert.False(t, skipResourceUpdate(nil, info))
   340  	})
   341  	t.Run("To Nil", func(t *testing.T) {
   342  		assert.False(t, skipResourceUpdate(info, nil))
   343  	})
   344  	t.Run("No hash", func(t *testing.T) {
   345  		assert.False(t, skipResourceUpdate(&ResourceInfo{}, &ResourceInfo{}))
   346  	})
   347  	t.Run("Same hash", func(t *testing.T) {
   348  		assert.True(t, skipResourceUpdate(&ResourceInfo{
   349  			manifestHash: hash1_x,
   350  		}, &ResourceInfo{
   351  			manifestHash: hash1_x,
   352  		}))
   353  	})
   354  	t.Run("Same hash value", func(t *testing.T) {
   355  		assert.True(t, skipResourceUpdate(&ResourceInfo{
   356  			manifestHash: hash1_x,
   357  		}, &ResourceInfo{
   358  			manifestHash: hash3_x,
   359  		}))
   360  	})
   361  	t.Run("Different hash value", func(t *testing.T) {
   362  		assert.False(t, skipResourceUpdate(&ResourceInfo{
   363  			manifestHash: hash1_x,
   364  		}, &ResourceInfo{
   365  			manifestHash: hash2_y,
   366  		}))
   367  	})
   368  	t.Run("Same hash, empty health", func(t *testing.T) {
   369  		assert.True(t, skipResourceUpdate(&ResourceInfo{
   370  			manifestHash: hash1_x,
   371  			Health:       &health.HealthStatus{},
   372  		}, &ResourceInfo{
   373  			manifestHash: hash3_x,
   374  			Health:       &health.HealthStatus{},
   375  		}))
   376  	})
   377  	t.Run("Same hash, old health", func(t *testing.T) {
   378  		assert.False(t, skipResourceUpdate(&ResourceInfo{
   379  			manifestHash: hash1_x,
   380  			Health: &health.HealthStatus{
   381  				Status: health.HealthStatusHealthy},
   382  		}, &ResourceInfo{
   383  			manifestHash: hash3_x,
   384  			Health:       nil,
   385  		}))
   386  	})
   387  	t.Run("Same hash, new health", func(t *testing.T) {
   388  		assert.False(t, skipResourceUpdate(&ResourceInfo{
   389  			manifestHash: hash1_x,
   390  			Health:       &health.HealthStatus{},
   391  		}, &ResourceInfo{
   392  			manifestHash: hash3_x,
   393  			Health: &health.HealthStatus{
   394  				Status: health.HealthStatusHealthy,
   395  			},
   396  		}))
   397  	})
   398  	t.Run("Same hash, same health", func(t *testing.T) {
   399  		assert.True(t, skipResourceUpdate(&ResourceInfo{
   400  			manifestHash: hash1_x,
   401  			Health: &health.HealthStatus{
   402  				Status:  health.HealthStatusHealthy,
   403  				Message: "same",
   404  			},
   405  		}, &ResourceInfo{
   406  			manifestHash: hash3_x,
   407  			Health: &health.HealthStatus{
   408  				Status:  health.HealthStatusHealthy,
   409  				Message: "same",
   410  			},
   411  		}))
   412  	})
   413  	t.Run("Same hash, different health status", func(t *testing.T) {
   414  		assert.False(t, skipResourceUpdate(&ResourceInfo{
   415  			manifestHash: hash1_x,
   416  			Health: &health.HealthStatus{
   417  				Status:  health.HealthStatusHealthy,
   418  				Message: "same",
   419  			},
   420  		}, &ResourceInfo{
   421  			manifestHash: hash3_x,
   422  			Health: &health.HealthStatus{
   423  				Status:  health.HealthStatusDegraded,
   424  				Message: "same",
   425  			},
   426  		}))
   427  	})
   428  	t.Run("Same hash, different health message", func(t *testing.T) {
   429  		assert.True(t, skipResourceUpdate(&ResourceInfo{
   430  			manifestHash: hash1_x,
   431  			Health: &health.HealthStatus{
   432  				Status:  health.HealthStatusHealthy,
   433  				Message: "same",
   434  			},
   435  		}, &ResourceInfo{
   436  			manifestHash: hash3_x,
   437  			Health: &health.HealthStatus{
   438  				Status:  health.HealthStatusHealthy,
   439  				Message: "different",
   440  			},
   441  		}))
   442  	})
   443  }