go.temporal.io/server@v1.23.0/common/persistence/persistence-tests/cluster_metadata_manager.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package persistencetests 26 27 import ( 28 "context" 29 "net" 30 "time" 31 32 "github.com/pborman/uuid" 33 "github.com/stretchr/testify/require" 34 "go.temporal.io/api/serviceerror" 35 versionpb "go.temporal.io/api/version/v1" 36 37 persistencespb "go.temporal.io/server/api/persistence/v1" 38 "go.temporal.io/server/common/debug" 39 p "go.temporal.io/server/common/persistence" 40 "go.temporal.io/server/common/primitives" 41 ) 42 43 type ( 44 // ClusterMetadataManagerSuite runs tests that cover the ClusterMetadata read/write scenarios 45 ClusterMetadataManagerSuite struct { 46 *TestBase 47 // override suite.Suite.Assertions with require.Assertions; this means that s.NotNil(nil) will stop the test, 48 // not merely log an error 49 *require.Assertions 50 51 ctx context.Context 52 cancel context.CancelFunc 53 } 54 ) 55 56 // SetupSuite implementation 57 func (s *ClusterMetadataManagerSuite) SetupSuite() { 58 } 59 60 // SetupTest implementation 61 func (s *ClusterMetadataManagerSuite) SetupTest() { 62 // Have to define our overridden assertions in the test setup. If we did it earlier, s.T() will return nil 63 s.Assertions = require.New(s.T()) 64 s.ctx, s.cancel = context.WithTimeout(context.Background(), 30*time.Second*debug.TimeoutMultiplier) 65 } 66 67 // TearDownTest implementation 68 func (s *ClusterMetadataManagerSuite) TearDownTest() { 69 s.cancel() 70 } 71 72 // TearDownSuite implementation 73 func (s *ClusterMetadataManagerSuite) TearDownSuite() { 74 s.TearDownWorkflowStore() 75 } 76 77 // TestClusterMembershipEmptyInitially verifies the GetClusterMembers() works with an initial empty table 78 func (s *ClusterMetadataManagerSuite) TestClusterMembershipEmptyInitially() { 79 resp, err := s.ClusterMetadataManager.GetClusterMembers(s.ctx, &p.GetClusterMembersRequest{LastHeartbeatWithin: time.Minute * 10}) 80 s.Nil(err) 81 s.NotNil(resp) 82 s.Empty(resp.ActiveMembers) 83 } 84 85 // TestClusterMembershipUpsertCanRead verifies that we can UpsertClusterMembership and read our result 86 func (s *ClusterMetadataManagerSuite) TestClusterMembershipUpsertCanReadAny() { 87 req := &p.UpsertClusterMembershipRequest{ 88 HostID: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, 89 RPCAddress: net.ParseIP("127.0.0.2"), 90 RPCPort: 123, 91 Role: p.Frontend, 92 SessionStart: time.Now().UTC(), 93 RecordExpiry: time.Second, 94 } 95 96 err := s.ClusterMetadataManager.UpsertClusterMembership(s.ctx, req) 97 s.Nil(err) 98 99 resp, err := s.ClusterMetadataManager.GetClusterMembers(s.ctx, &p.GetClusterMembersRequest{}) 100 101 s.Nil(err) 102 s.NotNil(resp) 103 s.NotEmpty(resp.ActiveMembers) 104 } 105 106 // TestClusterMembershipUpsertCanRead verifies that we can UpsertClusterMembership and read our result 107 func (s *ClusterMetadataManagerSuite) TestClusterMembershipUpsertCanPageRead() { 108 // Expire previous records 109 // Todo: MetaMgr should provide api to clear all members 110 time.Sleep(time.Second * 3) 111 err := s.ClusterMetadataManager.PruneClusterMembership(s.ctx, &p.PruneClusterMembershipRequest{MaxRecordsPruned: 100}) 112 s.Nil(err) 113 114 expectedIds := make(map[string]int, 100) 115 for i := 0; i < 100; i++ { 116 hostID := primitives.NewUUID().Downcast() 117 expectedIds[primitives.UUIDString(hostID)]++ 118 req := &p.UpsertClusterMembershipRequest{ 119 HostID: hostID, 120 RPCAddress: net.ParseIP("127.0.0.2"), 121 RPCPort: 123, 122 Role: p.Frontend, 123 SessionStart: time.Now().UTC(), 124 RecordExpiry: 3 * time.Second, 125 } 126 127 err := s.ClusterMetadataManager.UpsertClusterMembership(s.ctx, req) 128 s.NoError(err) 129 } 130 131 hostCount := 0 132 var nextPageToken []byte 133 for { 134 resp, err := s.ClusterMetadataManager.GetClusterMembers(s.ctx, &p.GetClusterMembersRequest{PageSize: 9, NextPageToken: nextPageToken}) 135 s.NoError(err) 136 nextPageToken = resp.NextPageToken 137 for _, member := range resp.ActiveMembers { 138 expectedIds[primitives.UUIDString(member.HostID)]-- 139 hostCount++ 140 } 141 142 if nextPageToken == nil { 143 break 144 } 145 } 146 147 s.Equal(100, hostCount) 148 for id, val := range expectedIds { 149 s.Zero(val, "identifier was either not found in db, or shouldn't be there - "+id) 150 } 151 152 time.Sleep(time.Second * 3) 153 err = s.ClusterMetadataManager.PruneClusterMembership(s.ctx, &p.PruneClusterMembershipRequest{MaxRecordsPruned: 1000}) 154 s.NoError(err) 155 } 156 157 func (s *ClusterMetadataManagerSuite) validateUpsert(req *p.UpsertClusterMembershipRequest, resp *p.GetClusterMembersResponse, err error) { 158 s.Nil(err) 159 s.NotNil(resp) 160 s.NotEmpty(resp.ActiveMembers) 161 s.Equal(len(resp.ActiveMembers), 1) 162 // Have to round to 1 second due to SQL implementations. Cassandra truncates at 1ms. 163 s.Equal(resp.ActiveMembers[0].SessionStart.Round(time.Second), req.SessionStart.Round(time.Second)) 164 s.Equal(resp.ActiveMembers[0].RPCAddress.String(), req.RPCAddress.String()) 165 s.Equal(resp.ActiveMembers[0].RPCPort, req.RPCPort) 166 s.True(resp.ActiveMembers[0].RecordExpiry.After(time.Now().UTC())) 167 s.Equal(resp.ActiveMembers[0].HostID, req.HostID) 168 s.Equal(resp.ActiveMembers[0].Role, req.Role) 169 } 170 171 // TestClusterMembershipReadFiltersCorrectly verifies that we can UpsertClusterMembership and read our result using filters 172 func (s *ClusterMetadataManagerSuite) TestClusterMembershipReadFiltersCorrectly() { 173 now := time.Now().UTC() 174 req := &p.UpsertClusterMembershipRequest{ 175 HostID: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, 176 RPCAddress: net.ParseIP("127.0.0.2"), 177 RPCPort: 123, 178 Role: p.Frontend, 179 SessionStart: now, 180 RecordExpiry: time.Second * 4, 181 } 182 183 err := s.ClusterMetadataManager.UpsertClusterMembership(s.ctx, req) 184 s.Nil(err) 185 186 resp, err := s.ClusterMetadataManager.GetClusterMembers( 187 s.ctx, 188 &p.GetClusterMembersRequest{LastHeartbeatWithin: time.Minute * 10, HostIDEquals: req.HostID}, 189 ) 190 191 s.validateUpsert(req, resp, err) 192 193 time.Sleep(time.Second * 1) 194 resp, err = s.ClusterMetadataManager.GetClusterMembers( 195 s.ctx, 196 &p.GetClusterMembersRequest{LastHeartbeatWithin: time.Millisecond, HostIDEquals: req.HostID}, 197 ) 198 199 s.Nil(err) 200 s.NotNil(resp) 201 s.Empty(resp.ActiveMembers) 202 203 resp, err = s.ClusterMetadataManager.GetClusterMembers( 204 s.ctx, 205 &p.GetClusterMembersRequest{RoleEquals: p.Matching}, 206 ) 207 208 s.Nil(err) 209 s.NotNil(resp) 210 s.Empty(resp.ActiveMembers) 211 212 resp, err = s.ClusterMetadataManager.GetClusterMembers( 213 s.ctx, 214 &p.GetClusterMembersRequest{SessionStartedAfter: time.Now().UTC()}, 215 ) 216 217 s.Nil(err) 218 s.NotNil(resp) 219 s.Empty(resp.ActiveMembers) 220 221 resp, err = s.ClusterMetadataManager.GetClusterMembers( 222 s.ctx, 223 &p.GetClusterMembersRequest{SessionStartedAfter: now.Add(-time.Minute), RPCAddressEquals: req.RPCAddress, HostIDEquals: req.HostID}, 224 ) 225 226 s.validateUpsert(req, resp, err) 227 228 time.Sleep(time.Second * 3) 229 err = s.ClusterMetadataManager.PruneClusterMembership(s.ctx, &p.PruneClusterMembershipRequest{MaxRecordsPruned: 1000}) 230 s.NoError(err) 231 } 232 233 // TestClusterMembershipUpsertExpiresCorrectly verifies RecordExpiry functions properly for ClusterMembership records 234 func (s *ClusterMetadataManagerSuite) TestClusterMembershipUpsertExpiresCorrectly() { 235 req := &p.UpsertClusterMembershipRequest{ 236 HostID: uuid.NewUUID(), 237 RPCAddress: net.ParseIP("127.0.0.2"), 238 RPCPort: 123, 239 Role: p.Frontend, 240 SessionStart: time.Now().UTC(), 241 RecordExpiry: time.Second, 242 } 243 244 err := s.ClusterMetadataManager.UpsertClusterMembership(s.ctx, req) 245 s.NoError(err) 246 247 err = s.ClusterMetadataManager.PruneClusterMembership(s.ctx, &p.PruneClusterMembershipRequest{MaxRecordsPruned: 100}) 248 s.NoError(err) 249 250 resp, err := s.ClusterMetadataManager.GetClusterMembers( 251 s.ctx, 252 &p.GetClusterMembersRequest{LastHeartbeatWithin: time.Minute * 10, HostIDEquals: req.HostID}, 253 ) 254 255 s.NoError(err) 256 s.NotNil(resp) 257 s.NotEmpty(resp.ActiveMembers) 258 s.Equal(len(resp.ActiveMembers), 1) 259 // Have to round to 1 second due to SQL implementations. Cassandra truncates at 1ms. 260 s.Equal(resp.ActiveMembers[0].SessionStart.Round(time.Second), req.SessionStart.Round(time.Second)) 261 s.Equal(resp.ActiveMembers[0].RPCAddress.String(), req.RPCAddress.String()) 262 s.Equal(resp.ActiveMembers[0].RPCPort, req.RPCPort) 263 s.True(resp.ActiveMembers[0].RecordExpiry.After(time.Now().UTC())) 264 s.Equal(resp.ActiveMembers[0].HostID, req.HostID) 265 s.Equal(resp.ActiveMembers[0].Role, req.Role) 266 267 time.Sleep(time.Second * 2) 268 269 err = s.ClusterMetadataManager.PruneClusterMembership(s.ctx, &p.PruneClusterMembershipRequest{MaxRecordsPruned: 100}) 270 s.Nil(err) 271 272 resp, err = s.ClusterMetadataManager.GetClusterMembers( 273 s.ctx, 274 &p.GetClusterMembersRequest{LastHeartbeatWithin: time.Minute * 10}, 275 ) 276 277 s.Nil(err) 278 s.NotNil(resp) 279 s.Empty(resp.ActiveMembers) 280 } 281 282 // TestClusterMembershipUpsertInvalidExpiry verifies we cannot specify a non-positive RecordExpiry duration 283 func (s *ClusterMetadataManagerSuite) TestClusterMembershipUpsertInvalidExpiry() { 284 req := &p.UpsertClusterMembershipRequest{ 285 HostID: []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, 286 RPCAddress: net.ParseIP("127.0.0.2"), 287 RPCPort: 123, 288 Role: p.Frontend, 289 SessionStart: time.Now().UTC(), 290 RecordExpiry: time.Second * 0, 291 } 292 293 err := s.ClusterMetadataManager.UpsertClusterMembership(s.ctx, req) 294 s.NotNil(err) 295 s.IsType(err, p.ErrInvalidMembershipExpiry) 296 } 297 298 // TestInitImmutableMetadataReadWrite runs through the various cases of ClusterMetadata behavior 299 // Cases: 300 // 1 - Get, no data persisted 301 // 2 - Init, no data persisted 302 // 3 - Get, data persisted 303 // 4 - Init, data persisted 304 // 5 - Update, add version info and make sure it's persisted and can be retrieved. 305 // 6 - Delete, no data persisted 306 func (s *ClusterMetadataManagerSuite) TestInitImmutableMetadataReadWrite() { 307 clusterNameToPersist := "testing" 308 historyShardsToPersist := int32(43) 309 clusterIdToPersist := "12345" 310 clusterAddress := "cluster-address" 311 failoverVersionIncrement := int64(10) 312 initialFailoverVersion := int64(1) 313 314 // Case 1 - Get, mo data persisted 315 // Fetch the persisted values, there should be nothing on start. 316 // This doesn't error on no row found, but returns an empty record. 317 getResp, err := s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 318 319 // Validate they match our initializations 320 s.NotNil(err) 321 s.IsType(&serviceerror.NotFound{}, err) 322 s.Nil(getResp) 323 324 // Case 2 - Init, no data persisted yet 325 // First commit, this should be persisted 326 initialResp, err := s.ClusterMetadataManager.SaveClusterMetadata( 327 s.ctx, 328 &p.SaveClusterMetadataRequest{ 329 ClusterMetadata: &persistencespb.ClusterMetadata{ 330 ClusterName: clusterNameToPersist, 331 HistoryShardCount: historyShardsToPersist, 332 ClusterId: clusterIdToPersist, 333 ClusterAddress: clusterAddress, 334 FailoverVersionIncrement: failoverVersionIncrement, 335 InitialFailoverVersion: initialFailoverVersion, 336 IsGlobalNamespaceEnabled: true, 337 IsConnectionEnabled: true, 338 }}) 339 340 s.Nil(err) 341 s.True(initialResp) // request should be applied as this is first initialize 342 343 // Case 3 - Get, data persisted 344 // Fetch the persisted values 345 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 346 347 // Validate they match our initializations 348 s.Nil(err) 349 s.True(getResp != nil) 350 s.Equal(clusterNameToPersist, getResp.ClusterName) 351 s.Equal(historyShardsToPersist, getResp.HistoryShardCount) 352 s.Equal(clusterIdToPersist, getResp.ClusterId) 353 s.Equal(clusterAddress, getResp.ClusterAddress) 354 s.Equal(failoverVersionIncrement, getResp.FailoverVersionIncrement) 355 s.Equal(initialFailoverVersion, getResp.InitialFailoverVersion) 356 s.True(getResp.IsGlobalNamespaceEnabled) 357 s.True(getResp.IsConnectionEnabled) 358 359 // Case 4 - Init, data persisted 360 // Attempt to overwrite with new values 361 secondResp, err := s.ClusterMetadataManager.SaveClusterMetadata(s.ctx, &p.SaveClusterMetadataRequest{ 362 ClusterMetadata: &persistencespb.ClusterMetadata{ 363 ClusterName: clusterNameToPersist, 364 HistoryShardCount: int32(77), 365 }}) 366 367 s.Nil(err) 368 s.False(secondResp) // Should not have applied, and should match values from first request 369 370 // Refetch persisted 371 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 372 373 // Validate they match our initial values 374 s.Nil(err) 375 s.NotNil(getResp) 376 s.Equal(clusterNameToPersist, getResp.ClusterName) 377 s.Equal(historyShardsToPersist, getResp.HistoryShardCount) 378 s.Equal(clusterIdToPersist, getResp.ClusterId) 379 s.Equal(clusterAddress, getResp.ClusterAddress) 380 s.Equal(failoverVersionIncrement, getResp.FailoverVersionIncrement) 381 s.Equal(initialFailoverVersion, getResp.InitialFailoverVersion) 382 s.True(getResp.IsGlobalNamespaceEnabled) 383 s.True(getResp.IsConnectionEnabled) 384 385 // Case 5 - Update version info 386 getResp.VersionInfo = &versionpb.VersionInfo{ 387 Current: &versionpb.ReleaseInfo{ 388 Version: "1.0", 389 }, 390 } 391 thirdResp, err := s.ClusterMetadataManager.SaveClusterMetadata(s.ctx, &p.SaveClusterMetadataRequest{ 392 ClusterMetadata: getResp.ClusterMetadata, 393 Version: getResp.Version, 394 }) 395 s.Nil(err) 396 s.True(thirdResp) 397 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 398 s.Nil(err) 399 s.NotNil(getResp) 400 s.Equal("1.0", getResp.ClusterMetadata.VersionInfo.Current.Version) 401 402 // Case 6 - Delete Cluster Metadata 403 err = s.ClusterMetadataManager.DeleteClusterMetadata(s.ctx, &p.DeleteClusterMetadataRequest{ClusterName: clusterNameToPersist}) 404 s.Nil(err) 405 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 406 407 // Validate they match our initializations 408 s.NotNil(err) 409 s.IsType(&serviceerror.NotFound{}, err) 410 s.Nil(getResp) 411 412 // Case 7 - Update current cluster metadata 413 clusterNameToPersist = "active" 414 initialResp, err = s.ClusterMetadataManager.SaveClusterMetadata( 415 s.ctx, 416 &p.SaveClusterMetadataRequest{ 417 ClusterMetadata: &persistencespb.ClusterMetadata{ 418 ClusterName: clusterNameToPersist, 419 HistoryShardCount: historyShardsToPersist, 420 ClusterId: clusterIdToPersist, 421 ClusterAddress: clusterAddress, 422 FailoverVersionIncrement: failoverVersionIncrement, 423 InitialFailoverVersion: initialFailoverVersion, 424 IsGlobalNamespaceEnabled: true, 425 IsConnectionEnabled: true, 426 }}) 427 s.Nil(err) 428 s.True(initialResp) 429 430 // Case 8 - Get, data persisted 431 // Fetch the persisted values 432 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 433 434 // Validate they match our initializations 435 s.Nil(err) 436 s.True(getResp != nil) 437 s.Equal(clusterNameToPersist, getResp.ClusterName) 438 s.Equal(historyShardsToPersist, getResp.HistoryShardCount) 439 s.Equal(clusterIdToPersist, getResp.ClusterId) 440 s.Equal(clusterAddress, getResp.ClusterAddress) 441 s.Equal(failoverVersionIncrement, getResp.FailoverVersionIncrement) 442 s.Equal(initialFailoverVersion, getResp.InitialFailoverVersion) 443 s.True(getResp.IsGlobalNamespaceEnabled) 444 s.True(getResp.IsConnectionEnabled) 445 446 // Case 9 - Update current cluster metadata 447 getResp.VersionInfo = &versionpb.VersionInfo{ 448 Current: &versionpb.ReleaseInfo{ 449 Version: "2.0", 450 }, 451 } 452 applied, err := s.ClusterMetadataManager.SaveClusterMetadata(s.ctx, &p.SaveClusterMetadataRequest{ 453 ClusterMetadata: getResp.ClusterMetadata, 454 Version: getResp.Version, 455 }) 456 s.True(applied) 457 s.NoError(err) 458 459 // Case 10 - Get, data persisted 460 // Fetch the persisted values 461 getResp, err = s.ClusterMetadataManager.GetClusterMetadata(s.ctx, &p.GetClusterMetadataRequest{ClusterName: clusterNameToPersist}) 462 s.NoError(err) 463 s.Equal("2.0", getResp.ClusterMetadata.VersionInfo.Current.Version) 464 465 // Case 11 - List 466 _, err = s.ClusterMetadataManager.SaveClusterMetadata( 467 s.ctx, 468 &p.SaveClusterMetadataRequest{ 469 ClusterMetadata: &persistencespb.ClusterMetadata{ 470 ClusterName: clusterNameToPersist + "2", 471 HistoryShardCount: historyShardsToPersist, 472 ClusterId: clusterIdToPersist, 473 ClusterAddress: clusterAddress, 474 FailoverVersionIncrement: failoverVersionIncrement, 475 InitialFailoverVersion: initialFailoverVersion, 476 IsGlobalNamespaceEnabled: true, 477 IsConnectionEnabled: true, 478 }}) 479 s.NoError(err) 480 481 resp, err := s.ClusterMetadataManager.ListClusterMetadata(s.ctx, &p.ListClusterMetadataRequest{PageSize: 1}) 482 s.NoError(err) 483 s.Equal(1, len(resp.ClusterMetadata)) 484 resp, err = s.ClusterMetadataManager.ListClusterMetadata(s.ctx, &p.ListClusterMetadataRequest{PageSize: 1, NextPageToken: resp.NextPageToken}) 485 s.NoError(err) 486 s.Equal(1, len(resp.ClusterMetadata)) 487 }