github.com/percona/percona-xtradb-cluster-operator@v1.14.0/pkg/controller/pxc/replication.go (about) 1 package pxc 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 8 "github.com/hashicorp/go-version" 9 "github.com/pkg/errors" 10 corev1 "k8s.io/api/core/v1" 11 k8serrors "k8s.io/apimachinery/pkg/api/errors" 12 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 "k8s.io/apimachinery/pkg/labels" 14 "k8s.io/apimachinery/pkg/types" 15 "sigs.k8s.io/controller-runtime/pkg/client" 16 logf "sigs.k8s.io/controller-runtime/pkg/log" 17 18 api "github.com/percona/percona-xtradb-cluster-operator/pkg/apis/pxc/v1" 19 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/app/statefulset" 20 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/queries" 21 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/users" 22 ) 23 24 const replicationPodLabel = "percona.com/replicationPod" 25 26 var minReplicationVersion = version.Must(version.NewVersion("8.0.23-14.1")) 27 28 func (r *ReconcilePerconaXtraDBCluster) ensurePxcPodServices(cr *api.PerconaXtraDBCluster) error { 29 if cr.Spec.Pause { 30 return nil 31 } 32 33 isBackupRunning, err := r.isBackupRunning(cr) 34 if err != nil { 35 return errors.Wrap(err, "failed to check if backup is running") 36 } 37 38 if isBackupRunning { 39 return nil 40 } 41 42 isRestoreRunning, err := r.isRestoreRunning(cr.Name, cr.Namespace) 43 if err != nil { 44 return errors.Wrap(err, "failed to check if restore is running") 45 } 46 47 if isRestoreRunning { 48 return nil 49 } 50 51 for i := 0; i < int(cr.Spec.PXC.Size); i++ { 52 svcName := fmt.Sprintf("%s-pxc-%d", cr.Name, i) 53 svc := NewExposedPXCService(svcName, cr) 54 55 err = r.createOrUpdateService(cr, svc, len(cr.Spec.PXC.Expose.Annotations) == 0) 56 if err != nil { 57 return errors.Wrap(err, "failed to ensure pxc service") 58 } 59 } 60 return r.removeOutdatedServices(cr) 61 } 62 63 func (r *ReconcilePerconaXtraDBCluster) removeOutdatedServices(cr *api.PerconaXtraDBCluster) error { 64 // needed for labels 65 svc := NewExposedPXCService("", cr) 66 67 svcNames := make(map[string]struct{}, cr.Spec.PXC.Size) 68 for i := 0; i < int(cr.Spec.PXC.Size); i++ { 69 svcNames[fmt.Sprintf("%s-pxc-%d", cr.Name, i)] = struct{}{} 70 } 71 72 svcList := &corev1.ServiceList{} 73 err := r.client.List(context.TODO(), 74 svcList, 75 &client.ListOptions{ 76 Namespace: cr.Namespace, 77 LabelSelector: labels.SelectorFromSet(svc.Labels), 78 }, 79 ) 80 if err != nil { 81 return errors.Wrap(err, "failed to list external services") 82 } 83 84 for _, service := range svcList.Items { 85 if _, ok := svcNames[service.Name]; !ok { 86 err = r.client.Delete(context.TODO(), &service) 87 if err != nil { 88 return errors.Wrapf(err, "failed to delete service %s", service.Name) 89 } 90 } 91 } 92 return nil 93 } 94 95 func (r *ReconcilePerconaXtraDBCluster) reconcileReplication(ctx context.Context, cr *api.PerconaXtraDBCluster, replicaPassUpdated bool) error { 96 log := logf.FromContext(ctx) 97 98 if cr.Status.PXC.Ready < 1 || cr.Spec.Pause { 99 return nil 100 } 101 102 sfs := statefulset.NewNode(cr) 103 104 listRaw := corev1.PodList{} 105 err := r.client.List(context.TODO(), 106 &listRaw, 107 &client.ListOptions{ 108 Namespace: cr.Namespace, 109 LabelSelector: labels.SelectorFromSet(sfs.Labels()), 110 }, 111 ) 112 if k8serrors.IsNotFound(err) { 113 return nil 114 } else if err != nil { 115 return errors.Wrap(err, "get pod list") 116 } 117 118 // we need only running pods, because we unable to 119 // connect to failed/pending pods 120 podList := make([]corev1.Pod, 0) 121 for _, pod := range listRaw.Items { 122 if isPodReady(pod) { 123 podList = append(podList, pod) 124 } 125 } 126 127 primary, err := r.getPrimaryPod(cr) 128 if err != nil { 129 return errors.Wrap(err, "get primary pxc pod") 130 } 131 132 var primaryPod *corev1.Pod 133 for _, pod := range podList { 134 if pod.Status.PodIP == primary || pod.Name == primary || strings.HasPrefix(primary, fmt.Sprintf("%s.%s.%s", pod.Name, sfs.StatefulSet().Name, cr.Namespace)) { 135 primaryPod = &pod 136 break 137 } 138 } 139 140 if primaryPod == nil { 141 log.Info("Unable to find primary pod for replication. No pod with name or ip like this", "primary name", primary) 142 return nil 143 } 144 145 port := int32(33062) 146 147 primaryDB, err := queries.New(r.client, cr.Namespace, internalSecretsPrefix+cr.Name, users.Operator, primaryPod.Name+"."+cr.Name+"-pxc."+cr.Namespace, port, cr.Spec.PXC.ReadinessProbes.TimeoutSeconds) 148 if err != nil { 149 return errors.Wrapf(err, "failed to connect to pod %s", primaryPod.Name) 150 } 151 152 defer primaryDB.Close() 153 154 dbVer, err := primaryDB.Version() 155 if err != nil { 156 return errors.Wrap(err, "failed to get current db version") 157 } 158 159 if version.Must(version.NewVersion(dbVer)).Compare(minReplicationVersion) < 0 { 160 return nil 161 } 162 163 err = removeOutdatedChannels(ctx, primaryDB, cr.Spec.PXC.ReplicationChannels) 164 if err != nil { 165 return errors.Wrap(err, "remove outdated replication channels") 166 } 167 168 err = checkReadonlyStatus(ctx, cr.Spec.PXC.ReplicationChannels, podList, cr, r.client) 169 if err != nil { 170 return errors.Wrap(err, "failed to ensure cluster readonly status") 171 } 172 173 if len(cr.Spec.PXC.ReplicationChannels) == 0 { 174 return deleteReplicaLabels(r.client, podList) 175 } 176 177 if cr.Spec.PXC.ReplicationChannels[0].IsSource { 178 return deleteReplicaLabels(r.client, podList) 179 } 180 181 // if primary pod is not a replica, we need to make it as replica, and stop replication on other pods 182 for _, pod := range podList { 183 if pod.Name == primaryPod.Name { 184 continue 185 } 186 if _, ok := pod.Labels[replicationPodLabel]; ok { 187 db, err := queries.New(r.client, cr.Namespace, internalSecretsPrefix+cr.Name, users.Operator, pod.Name+"."+cr.Name+"-pxc."+cr.Namespace, port, cr.Spec.PXC.ReadinessProbes.TimeoutSeconds) 188 if err != nil { 189 return errors.Wrapf(err, "failed to connect to pod %s", pod.Name) 190 } 191 log.V(1).Info("Stop replication on pod", "pod", pod.Name) 192 err = db.StopAllReplication() 193 db.Close() 194 if err != nil { 195 return errors.Wrapf(err, "stop replication on pod %s", pod.Name) 196 } 197 log.V(1).Info("Remove replication label from pod", "pod", pod.Name) 198 delete(pod.Labels, replicationPodLabel) 199 err = r.client.Update(context.TODO(), &pod) 200 if err != nil { 201 return errors.Wrap(err, "failed to remove primary label from secondary pod") 202 } 203 } 204 } 205 206 if _, ok := primaryPod.Labels[replicationPodLabel]; !ok { 207 primaryPod.Labels[replicationPodLabel] = "true" 208 err = r.client.Update(context.TODO(), primaryPod) 209 if err != nil { 210 return errors.Wrap(err, "add label to main replica pod") 211 } 212 log.Info("Replication pod has changed", "new replication pod", primaryPod.Name) 213 } 214 215 sysUsersSecretObj := corev1.Secret{} 216 err = r.client.Get(context.TODO(), 217 types.NamespacedName{ 218 Namespace: cr.Namespace, 219 Name: internalSecretsPrefix + cr.Name, 220 }, 221 &sysUsersSecretObj, 222 ) 223 if err != nil { 224 return errors.Wrap(err, "get secrets") 225 } 226 227 if replicaPassUpdated { 228 err = handleReplicaPasswordChange(primaryDB, string(sysUsersSecretObj.Data[users.Replication])) 229 if err != nil { 230 return errors.Wrap(err, "failed to change replication password") 231 } 232 } 233 234 authPlugin, err := primaryDB.ReadVariable("default_authentication_plugin") 235 if err != nil { 236 return errors.Wrap(err, "failed to get default_authentication_plugin variable value") 237 } 238 239 shouldGetMasterKey := authPlugin == "caching_sha2_password" 240 241 for _, channel := range cr.Spec.PXC.ReplicationChannels { 242 if channel.IsSource { 243 continue 244 } 245 246 currConf := currentReplicaConfig(channel.Name, cr.Status.PXCReplication) 247 248 err = manageReplicationChannel(ctx, primaryDB, channel, currConf, string(sysUsersSecretObj.Data[users.Replication]), shouldGetMasterKey) 249 if err != nil { 250 return errors.Wrapf(err, "manage replication channel %s", channel.Name) 251 } 252 setReplicationChannelStatus(cr, channel) 253 } 254 255 return r.updateStatus(cr, false, nil) 256 } 257 258 func handleReplicaPasswordChange(db queries.Database, newPass string) error { 259 channels, err := db.CurrentReplicationChannels() 260 if err != nil { 261 return errors.Wrap(err, "get current replication channels") 262 } 263 264 for _, channel := range channels { 265 err := db.ChangeChannelPassword(channel, newPass) 266 if err != nil { 267 return errors.Wrapf(err, "change password for channel %s", channel) 268 } 269 } 270 return nil 271 } 272 273 func checkReadonlyStatus(ctx context.Context, channels []api.ReplicationChannel, pods []corev1.Pod, cr *api.PerconaXtraDBCluster, client client.Client) error { 274 log := logf.FromContext(ctx) 275 276 isReplica := false 277 if len(channels) > 0 { 278 isReplica = !channels[0].IsSource 279 } 280 281 for _, pod := range pods { 282 db, err := queries.New(client, cr.Namespace, internalSecretsPrefix+cr.Name, users.Operator, pod.Name+"."+cr.Name+"-pxc."+cr.Namespace, 33062, cr.Spec.PXC.ReadinessProbes.TimeoutSeconds) 283 if err != nil { 284 return errors.Wrapf(err, "connect to pod %s", pod.Name) 285 } 286 defer db.Close() 287 readonly, err := db.IsReadonly() 288 if err != nil { 289 return errors.Wrap(err, "check readonly status") 290 } 291 292 if isReplica && readonly || (!isReplica && !readonly) { 293 continue 294 } 295 296 if isReplica && !readonly { 297 log.Info("Replica is not readonly. Enabling readonly mode", "pod", pod.Name) 298 err = db.EnableReadonly() 299 } 300 301 if !isReplica && readonly { 302 log.Info("Primary is readonly. Disabling readonly mode", "pod", pod.Name) 303 err = db.DisableReadonly() 304 } 305 if err != nil { 306 return errors.Wrap(err, "enable or disable readonly mode") 307 } 308 309 } 310 return nil 311 } 312 313 func removeOutdatedChannels(ctx context.Context, db queries.Database, currentChannels []api.ReplicationChannel) error { 314 log := logf.FromContext(ctx) 315 316 dbChannels, err := db.CurrentReplicationChannels() 317 if err != nil { 318 return errors.Wrap(err, "get current replication channels") 319 } 320 321 if len(dbChannels) == 0 { 322 return nil 323 } 324 325 toRemove := make(map[string]struct{}) 326 for _, v := range dbChannels { 327 toRemove[v] = struct{}{} 328 } 329 330 for _, v := range currentChannels { 331 if !v.IsSource { 332 delete(toRemove, v.Name) 333 } 334 } 335 336 if len(toRemove) == 0 { 337 return nil 338 } 339 340 for channelToRemove := range toRemove { 341 log.Info("Remove outdated replication channel", "channel", channelToRemove) 342 err = db.StopReplication(channelToRemove) 343 if err != nil && !strings.Contains(err.Error(), "Error 3074") { // Error 3074: ER_REPLICA_CHANNEL_DOES_NOT_EXIST 344 return errors.Wrapf(err, "stop replication for channel %s", channelToRemove) 345 } 346 347 srcList, err := db.ReplicationChannelSources(channelToRemove) 348 if err != nil && err != queries.ErrNotFound { 349 return errors.Wrapf(err, "get src list for outdated channel %s", channelToRemove) 350 } 351 for _, v := range srcList { 352 log.V(1).Info("Remove outdated replication source", "channel", channelToRemove, "host", v.Host) 353 err = db.DeleteReplicationSource(channelToRemove, v.Host, v.Port) 354 if err != nil { 355 return errors.Wrapf(err, "delete replication source for outdated channel %s", channelToRemove) 356 } 357 } 358 } 359 return nil 360 } 361 362 func manageReplicationChannel(ctx context.Context, primaryDB queries.Database, channel api.ReplicationChannel, currConf api.ReplicationChannelConfig, replicaPW string, shouldGetMasterKey bool) error { 363 log := logf.FromContext(ctx) 364 currentSources, err := primaryDB.ReplicationChannelSources(channel.Name) 365 if err != nil && err != queries.ErrNotFound { 366 return errors.Wrapf(err, "get current replication sources for channel %s", channel.Name) 367 } 368 369 replicationStatus, err := primaryDB.ReplicationStatus(ctx, channel.Name) 370 if err != nil { 371 return errors.Wrap(err, "failed to check replication status") 372 } 373 374 if !isSourcesChanged(channel.SourcesList, currentSources) { 375 if replicationStatus == queries.ReplicationStatusError { 376 statusMap, err := primaryDB.ShowReplicaStatus(ctx, channel.Name) 377 if err != nil { 378 return errors.Wrap(err, "failed to get replica status") 379 } 380 log.Info("Replication for channel is not running. Please, check the replication status", "channel", channel.Name, "Last_IO_Error", statusMap["Last_IO_Error"]) 381 return nil 382 } 383 384 if replicationStatus == queries.ReplicationStatusActive && 385 *channel.Config == currConf { 386 return nil 387 } 388 } 389 390 if replicationStatus == queries.ReplicationStatusActive { 391 err = primaryDB.StopReplication(channel.Name) 392 if err != nil { 393 return errors.Wrapf(err, "stop replication for channel %s", channel.Name) 394 } 395 } 396 397 for _, src := range currentSources { 398 err = primaryDB.DeleteReplicationSource(channel.Name, src.Host, src.Port) 399 if err != nil { 400 return errors.Wrapf(err, "delete replication source for channel %s", channel.Name) 401 } 402 } 403 404 maxWeight := 0 405 maxWeightSrc := channel.SourcesList[0] 406 407 for _, src := range channel.SourcesList { 408 if src.Weight > maxWeight { 409 maxWeightSrc = src 410 } 411 err := primaryDB.AddReplicationSource(channel.Name, src.Host, src.Port, src.Weight) 412 if err != nil { 413 return errors.Wrapf(err, "add replication source for channel %s", channel.Name) 414 } 415 } 416 417 return primaryDB.StartReplication(replicaPW, queries.ReplicationConfig{ 418 Source: queries.ReplicationChannelSource{ 419 Name: channel.Name, 420 Host: maxWeightSrc.Host, 421 Port: maxWeightSrc.Port, 422 }, 423 SourceRetryCount: channel.Config.SourceRetryCount, 424 SourceConnectRetry: channel.Config.SourceConnectRetry, 425 SSL: channel.Config.SSL, 426 SSLSkipVerify: channel.Config.SSLSkipVerify, 427 CA: channel.Config.CA, 428 }, shouldGetMasterKey) 429 } 430 431 func isSourcesChanged(new []api.ReplicationSource, old []queries.ReplicationChannelSource) bool { 432 if len(new) != len(old) { 433 return true 434 } 435 436 oldSrc := make(map[string]queries.ReplicationChannelSource) 437 for _, src := range old { 438 oldSrc[src.Host] = src 439 } 440 441 for _, v := range new { 442 oldSource, ok := oldSrc[v.Host] 443 if !ok { 444 return true 445 } 446 if oldSource.Port != v.Port || oldSource.Weight != v.Weight { 447 return true 448 } 449 delete(oldSrc, v.Host) 450 } 451 452 return len(oldSrc) != 0 453 } 454 455 func deleteReplicaLabels(client client.Client, pods []corev1.Pod) error { 456 for _, pod := range pods { 457 if _, ok := pod.Labels[replicationPodLabel]; ok { 458 delete(pod.Labels, replicationPodLabel) 459 err := client.Update(context.TODO(), &pod) 460 if err != nil { 461 return errors.Wrap(err, "failed to remove replication label from pod") 462 } 463 } 464 } 465 return nil 466 } 467 468 func (r *ReconcilePerconaXtraDBCluster) removePxcPodServices(cr *api.PerconaXtraDBCluster) error { 469 if cr.Spec.Pause { 470 return nil 471 } 472 473 // needed for labels 474 svc := NewExposedPXCService("", cr) 475 476 svcList := &corev1.ServiceList{} 477 err := r.client.List(context.TODO(), 478 svcList, 479 &client.ListOptions{ 480 Namespace: cr.Namespace, 481 LabelSelector: labels.SelectorFromSet(svc.Labels), 482 }, 483 ) 484 if k8serrors.IsNotFound(err) { 485 return nil 486 } 487 488 if err != nil { 489 return errors.Wrap(err, "failed to list external services") 490 } 491 492 for _, service := range svcList.Items { 493 err = r.client.Delete(context.TODO(), &service) 494 if err != nil { 495 return errors.Wrap(err, "failed to delete external service") 496 } 497 } 498 return nil 499 } 500 501 func NewExposedPXCService(svcName string, cr *api.PerconaXtraDBCluster) *corev1.Service { 502 svc := &corev1.Service{ 503 TypeMeta: metav1.TypeMeta{ 504 APIVersion: "v1", 505 Kind: "Service", 506 }, 507 ObjectMeta: metav1.ObjectMeta{ 508 Name: svcName, 509 Namespace: cr.Namespace, 510 Labels: map[string]string{ 511 "app.kubernetes.io/name": "percona-xtradb-cluster", 512 "app.kubernetes.io/instance": cr.Name, 513 "app.kubernetes.io/component": "external-service", 514 }, 515 Annotations: cr.Spec.PXC.Expose.Annotations, 516 }, 517 Spec: corev1.ServiceSpec{ 518 Ports: []corev1.ServicePort{ 519 { 520 Port: 3306, 521 Name: "mysql", 522 }, 523 }, 524 LoadBalancerSourceRanges: cr.Spec.PXC.Expose.LoadBalancerSourceRanges, 525 Selector: map[string]string{ 526 "statefulset.kubernetes.io/pod-name": svcName, 527 }, 528 }, 529 } 530 531 if cr.Spec.PXC.Expose.Type == corev1.ServiceTypeNodePort || 532 cr.Spec.PXC.Expose.Type == corev1.ServiceTypeLoadBalancer { 533 if cr.CompareVersionWith("1.14.0") >= 0 { 534 switch cr.Spec.PXC.Expose.ExternalTrafficPolicy { 535 case corev1.ServiceExternalTrafficPolicyTypeLocal, corev1.ServiceExternalTrafficPolicyTypeCluster: 536 svc.Spec.ExternalTrafficPolicy = cr.Spec.PXC.Expose.ExternalTrafficPolicy 537 default: 538 svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster 539 } 540 } else { 541 switch cr.Spec.PXC.Expose.TrafficPolicy { 542 case corev1.ServiceExternalTrafficPolicyTypeLocal, corev1.ServiceExternalTrafficPolicyTypeCluster: 543 svc.Spec.ExternalTrafficPolicy = cr.Spec.PXC.Expose.TrafficPolicy 544 default: 545 svc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster 546 } 547 } 548 } 549 550 switch cr.Spec.PXC.Expose.Type { 551 case corev1.ServiceTypeNodePort: 552 svc.Spec.Type = corev1.ServiceTypeNodePort 553 case corev1.ServiceTypeLoadBalancer: 554 svc.Spec.Type = corev1.ServiceTypeLoadBalancer 555 default: 556 svc.Spec.Type = corev1.ServiceTypeClusterIP 557 } 558 559 return svc 560 } 561 562 // isPodReady returns a boolean reflecting if a pod is in a "ready" state 563 func isPodReady(pod corev1.Pod) bool { 564 for _, condition := range pod.Status.Conditions { 565 if condition.Status != corev1.ConditionTrue { 566 continue 567 } 568 if condition.Type == corev1.PodReady { 569 return true 570 } 571 } 572 return false 573 } 574 575 func currentReplicaConfig(name string, status *api.ReplicationStatus) api.ReplicationChannelConfig { 576 res := api.ReplicationChannelConfig{} 577 if status == nil { 578 return res 579 } 580 581 for _, v := range status.Channels { 582 if v.Name == name { 583 return v.ReplicationChannelConfig 584 } 585 } 586 return res 587 } 588 589 func setReplicationChannelStatus(cr *api.PerconaXtraDBCluster, channel api.ReplicationChannel) { 590 status := api.ReplicationChannelStatus{ 591 Name: channel.Name, 592 ReplicationChannelConfig: *channel.Config, 593 } 594 595 if cr.Status.PXCReplication == nil { 596 cr.Status.PXCReplication = &api.ReplicationStatus{ 597 Channels: []api.ReplicationChannelStatus{status}, 598 } 599 return 600 } 601 602 for k, v := range cr.Status.PXCReplication.Channels { 603 if channel.Name == v.Name { 604 cr.Status.PXCReplication.Channels[k] = status 605 return 606 } 607 } 608 609 cr.Status.PXCReplication.Channels = append(cr.Status.PXCReplication.Channels, status) 610 }