github.com/IBM-Blockchain/fabric-operator@v1.0.4/pkg/action/upgradedbs.go (about) 1 /* 2 * Copyright contributors to the Hyperledger Fabric Operator project 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at: 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package action 20 21 import ( 22 "context" 23 "fmt" 24 "path/filepath" 25 "time" 26 27 "github.com/pkg/errors" 28 29 current "github.com/IBM-Blockchain/fabric-operator/api/v1beta1" 30 oconfig "github.com/IBM-Blockchain/fabric-operator/operatorconfig" 31 "github.com/IBM-Blockchain/fabric-operator/pkg/initializer/common/config" 32 "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient" 33 controller "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient" 34 "github.com/IBM-Blockchain/fabric-operator/pkg/manager/resources/container" 35 "github.com/IBM-Blockchain/fabric-operator/pkg/manager/resources/deployment" 36 jobv1 "github.com/IBM-Blockchain/fabric-operator/pkg/manager/resources/job" 37 "github.com/IBM-Blockchain/fabric-operator/pkg/util" 38 "github.com/IBM-Blockchain/fabric-operator/pkg/util/image" 39 40 appsv1 "k8s.io/api/apps/v1" 41 batchv1 "k8s.io/api/batch/v1" 42 corev1 "k8s.io/api/core/v1" 43 "k8s.io/apimachinery/pkg/api/resource" 44 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 45 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 46 "k8s.io/apimachinery/pkg/labels" 47 "k8s.io/apimachinery/pkg/runtime" 48 "k8s.io/apimachinery/pkg/util/wait" 49 50 k8sclient "sigs.k8s.io/controller-runtime/pkg/client" 51 ) 52 53 //go:generate counterfeiter -o mocks/deploymentreset.go -fake-name DeploymentReset . DeploymentReset 54 55 // DeploymentReset defines the contract to manage deployment reousrce 56 type DeploymentReset interface { 57 Get(v1.Object) (k8sclient.Object, error) 58 DeploymentStatus(v1.Object) (appsv1.DeploymentStatus, error) 59 GetScheme() *runtime.Scheme 60 } 61 62 //go:generate counterfeiter -o mocks/upgradeinstance.go -fake-name UpgradeInstance . UpgradeInstance 63 64 // UpgradeInstance defines the contract to update the insstance database 65 type UpgradeInstance interface { 66 runtime.Object 67 v1.Object 68 UsingCouchDB() bool 69 UsingHSMProxy() bool 70 IsHSMEnabled() bool 71 } 72 73 // UpgradeDBs will update the database and peform all necessary clean up and restart logic 74 func UpgradeDBs(deploymentManager DeploymentReset, client controller.Client, instance UpgradeInstance, timeouts oconfig.DBMigrationTimeouts) error { 75 obj, err := deploymentManager.Get(instance) 76 if err != nil { 77 return errors.Wrap(err, "failed to get deployment") 78 } 79 80 dep := deployment.New(obj.(*appsv1.Deployment)) 81 originalReplicas := dep.Spec.Replicas 82 83 // Need to set replica to 0, otherwise migration job won't be able start to due to 84 // volume being attached to another node. 85 // 86 // Wait for deployment to get marked as unavailable after replica updated to 0 87 if err := setReplicaCountAndWait(client, deploymentManager, instance, int32(0), timeouts.ReplicaChange.Get()); err != nil { 88 return errors.Wrapf(err, "failed to update deployment for '%s'", instance.GetName()) 89 } 90 91 if err := waitForPodToDelete(client, instance, timeouts.PodDeletion.Get()); err != nil { 92 return err 93 } 94 95 var ip string 96 if instance.UsingCouchDB() { 97 couchDBPod := getCouchDBPod(dep) 98 if err := startCouchDBPod(client, couchDBPod); err != nil { 99 return err 100 } 101 102 ip, err = waitForPodToBeRunning(client, couchDBPod, timeouts.PodStart.Get()) 103 if err != nil { 104 return errors.Wrap(err, "couchdb pod failed to start") 105 } 106 } 107 108 var hsmConfig *config.HSMConfig 109 if !instance.UsingHSMProxy() && instance.IsHSMEnabled() { 110 hsmConfig, err = config.ReadHSMConfig(client, instance) 111 if err != nil { 112 return err 113 } 114 } 115 116 job := peerDBMigrationJob(dep, instance.(*current.IBPPeer), hsmConfig, ip, timeouts) 117 creatOpt := controllerclient.CreateOption{ 118 Owner: instance, 119 Scheme: deploymentManager.GetScheme(), 120 } 121 if err := StartJob(client, job.Job, creatOpt); err != nil { 122 if instance.UsingCouchDB() { 123 log.Info("failed to start db migration job, deleting couchdb pod") 124 couchDBPod := &corev1.Pod{ 125 ObjectMeta: v1.ObjectMeta{ 126 Name: fmt.Sprintf("%s-couchdb", instance.GetName()), 127 Namespace: instance.GetNamespace(), 128 }, 129 } 130 131 if err := client.Delete(context.TODO(), couchDBPod); err != nil { 132 return errors.Wrap(err, "failed to delete couchdb pod") 133 } 134 } 135 return errors.Wrap(err, "failed to start db migration job") 136 } 137 log.Info(fmt.Sprintf("Job '%s' created", job.GetName())) 138 139 // Wait for job to start and pod to go into running state before reverting 140 // back to original replica value 141 if err := job.WaitUntilActive(client); err != nil { 142 return err 143 } 144 log.Info(fmt.Sprintf("Job '%s' active", job.GetName())) 145 146 if err := job.WaitUntilContainerFinished(client, "dbmigration"); err != nil { 147 return err 148 } 149 log.Info(fmt.Sprintf("Job '%s' finished", job.GetName())) 150 151 // Wait for deployment to get marked as available after replica update 152 if err := setReplicaCountAndWait(client, deploymentManager, instance, *originalReplicas, timeouts.ReplicaChange.Get()); err != nil { 153 return errors.Wrapf(err, "failed to update deployment for '%s'", instance.GetName()) 154 } 155 156 return nil 157 } 158 159 // StartJob uses the client to create a job on kubernetes client 160 func StartJob(client controller.Client, job *batchv1.Job, opt controller.CreateOption) error { 161 log.Info(fmt.Sprintf("Starting job '%s'", job.GetName())) 162 163 if err := client.Create(context.TODO(), job, opt); err != nil { 164 return errors.Wrap(err, "failed to create migration job") 165 } 166 167 return nil 168 } 169 170 func startCouchDBPod(client controller.Client, pod *corev1.Pod) error { 171 log.Info(fmt.Sprintf("Starting couchdb pod '%s'", pod.GetName())) 172 173 if err := client.Create(context.TODO(), pod); err != nil { 174 return errors.Wrap(err, "failed to create couchdb pod") 175 } 176 177 return nil 178 } 179 180 func getCouchDBPod(dep *deployment.Deployment) *corev1.Pod { 181 couchdb := dep.MustGetContainer("couchdb") 182 183 localSpecCopy := dep.Spec.Template.Spec.DeepCopy() 184 volumes := localSpecCopy.Volumes 185 // Remove ledgerdb volume from couchddb pod 186 for i, volume := range volumes { 187 if volume.Name == "fabric-peer-0" { 188 // Remove the ledgerdb data from couchdb container 189 volumes[i] = volumes[len(volumes)-1] 190 volumes = volumes[:len(volumes)-1] 191 break 192 } 193 } 194 return &corev1.Pod{ 195 ObjectMeta: metav1.ObjectMeta{ 196 Name: fmt.Sprintf("%s-couchdb", dep.GetName()), 197 Namespace: dep.GetNamespace(), 198 Labels: map[string]string{ 199 "app": dep.Name, 200 }, 201 }, 202 Spec: corev1.PodSpec{ 203 ImagePullSecrets: dep.Spec.Template.Spec.ImagePullSecrets, 204 RestartPolicy: corev1.RestartPolicyNever, 205 Containers: []corev1.Container{ 206 *couchdb.Container, 207 }, 208 Volumes: volumes, 209 }, 210 } 211 } 212 213 func waitForPodToDelete(client controller.Client, instance metav1.Object, timeout time.Duration) error { 214 err := wait.Poll(2*time.Second, timeout, func() (bool, error) { 215 log.Info(fmt.Sprintf("Waiting for pod for deployment '%s' to be deleted", instance.GetName())) 216 217 labelSelector, err := labels.Parse(fmt.Sprintf("app=%s", instance.GetName())) 218 if err != nil { 219 return false, nil 220 } 221 222 opts := &k8sclient.ListOptions{ 223 LabelSelector: labelSelector, 224 } 225 226 pods := &corev1.PodList{} 227 if err := client.List(context.TODO(), pods, opts); err != nil { 228 return false, nil 229 } 230 231 if len(pods.Items) == 0 { 232 return true, nil 233 } 234 235 return false, nil 236 }) 237 if err != nil { 238 return errors.Wrapf(err, "failed to delete pod associated with '%s'", instance.GetName()) 239 } 240 return nil 241 } 242 243 func waitForPodToBeRunning(client controller.Client, pod *corev1.Pod, timeout time.Duration) (string, error) { 244 var podIP string 245 p := &corev1.Pod{} 246 247 err := wait.Poll(2*time.Second, timeout, func() (bool, error) { 248 log.Info(fmt.Sprintf("Waiting for couchdb pod '%s' to be running", pod.GetName())) 249 250 label := fmt.Sprintf("app=%s", pod.Labels["app"]) 251 labelSelector, err := labels.Parse(label) 252 if err != nil { 253 return false, err 254 } 255 256 opts := &k8sclient.ListOptions{ 257 LabelSelector: labelSelector, 258 } 259 260 pods := &corev1.PodList{} 261 if err := client.List(context.TODO(), pods, opts); err != nil { 262 return false, err 263 } 264 265 if len(pods.Items) != 1 { 266 return false, nil 267 } 268 269 p = &pods.Items[0] 270 if len(p.Status.ContainerStatuses) > 0 && p.Status.ContainerStatuses[0].State.Running != nil { 271 if p.Status.ContainerStatuses[0].Ready { 272 return true, nil 273 } 274 } 275 276 return false, nil 277 }) 278 if err != nil { 279 return podIP, errors.Wrapf(err, "pod '%s' not running", pod.GetName()) 280 } 281 282 if p != nil { 283 podIP = p.Status.PodIP 284 } 285 286 return podIP, nil 287 } 288 289 func setReplicaCountAndWait(client controller.Client, deploymentManager DeploymentReset, instance metav1.Object, count int32, timeout time.Duration) error { 290 obj, err := deploymentManager.Get(instance) 291 if err != nil { 292 return errors.Wrap(err, "failed to get deployment") 293 } 294 dep := deployment.New(obj.DeepCopyObject().(*appsv1.Deployment)) 295 296 if err := setReplicaCountOnDeployment(client, obj, dep, count); err != nil { 297 return err 298 } 299 300 err = wait.Poll(2*time.Second, timeout, func() (bool, error) { 301 log.Info(fmt.Sprintf("Waiting for deployment '%s' replicas to go to %d", dep.GetName(), count)) 302 status, err := deploymentManager.DeploymentStatus(instance) 303 if err == nil { 304 if status.Replicas == count { 305 return true, nil 306 } 307 } 308 return false, nil 309 }) 310 if err != nil { 311 return errors.Wrap(err, "failed to determine if deployment is available") 312 } 313 314 return nil 315 } 316 317 func setReplicaCountOnDeployment(client controller.Client, obj k8sclient.Object, dep *deployment.Deployment, count int32) error { 318 dep.Deployment.Spec.Replicas = &count 319 if err := client.Patch(context.TODO(), dep.Deployment, k8sclient.MergeFrom(obj)); err != nil { 320 return errors.Wrapf(err, "failed to update replica to %d", count) 321 } 322 return nil 323 } 324 325 // Copy of container that is passed but updated with new command 326 func peerDBMigrationJob(dep *deployment.Deployment, instance *current.IBPPeer, hsmConfig *config.HSMConfig, couchdbIP string, timeouts oconfig.DBMigrationTimeouts) *jobv1.Job { 327 cont := dep.MustGetContainer("peer") 328 envs := []string{ 329 "LICENSE", 330 "FABRIC_CFG_PATH", 331 "CORE_PEER_MSPCONFIGPATH", 332 "CORE_PEER_FILESYSTEMPATH", 333 "CORE_PEER_TLS_ENABLED", 334 "CORE_PEER_TLS_CERT_FILE", 335 "CORE_PEER_TLS_KEY_FILE", 336 "CORE_PEER_TLS_ROOTCERT_FILE", 337 "CORE_PEER_LOCALMSPID", 338 "CORE_LEDGER_STATE_COUCHDBCONFIG_USERNAME", 339 "CORE_LEDGER_STATE_COUCHDBCONFIG_PASSWORD", 340 "CORE_LEDGER_STATE_STATEDATABASE", 341 } 342 343 backoffLimit := int32(0) 344 envVars := cont.GetEnvs(envs) 345 envVars = append(envVars, 346 corev1.EnvVar{ 347 Name: "FABRIC_LOGGING_SPEC", 348 Value: "debug", 349 }, 350 ) 351 352 if couchdbIP != "" { 353 envVars = append(envVars, 354 corev1.EnvVar{ 355 Name: "CORE_LEDGER_STATE_COUCHDBCONFIG_COUCHDBADDRESS", 356 Value: fmt.Sprintf("%s:5984", couchdbIP), 357 }, 358 ) 359 } 360 361 command := `echo "Migrating peer's database" && peer node upgrade-dbs && mkdir -p /data/status && ts=$(date +%Y%m%d-%H%M%S) && touch /data/status/migrated_to_v2-$ts` 362 363 if instance.UsingHSMProxy() { 364 envVars = append(envVars, 365 corev1.EnvVar{ 366 Name: "PKCS11_PROXY_SOCKET", 367 Value: instance.Spec.HSM.PKCS11Endpoint, 368 }, 369 ) 370 } 371 372 localSpecCopy := dep.Spec.Template.Spec.DeepCopy() 373 volumes := localSpecCopy.Volumes 374 375 if instance.UsingCouchDB() { 376 // Remove statedb volume from migration pod 377 for i, volume := range volumes { 378 if volume.Name == "db-data" { 379 // Remove the statedb data from couchdb container 380 volumes[i] = volumes[len(volumes)-1] 381 volumes = volumes[:len(volumes)-1] 382 break 383 } 384 } 385 } 386 387 k8sJob := &batchv1.Job{ 388 ObjectMeta: metav1.ObjectMeta{ 389 Name: fmt.Sprintf("%s-dbmigration", instance.GetName()), 390 Namespace: dep.GetNamespace(), 391 Labels: map[string]string{ 392 "job-name": fmt.Sprintf("%s-dbmigration", instance.GetName()), 393 "owner": instance.GetName(), 394 }, 395 }, 396 Spec: batchv1.JobSpec{ 397 BackoffLimit: &backoffLimit, 398 Template: corev1.PodTemplateSpec{ 399 Spec: corev1.PodSpec{ 400 ImagePullSecrets: dep.Spec.Template.Spec.ImagePullSecrets, 401 RestartPolicy: corev1.RestartPolicyNever, 402 Containers: []corev1.Container{ 403 { 404 Name: "dbmigration", 405 Image: image.Format(instance.Spec.Images.PeerImage, instance.Spec.Images.PeerTag), 406 ImagePullPolicy: cont.ImagePullPolicy, 407 Command: []string{ 408 "sh", 409 "-c", 410 command, 411 }, 412 Env: envVars, 413 Resources: cont.Resources, 414 SecurityContext: cont.SecurityContext, 415 VolumeMounts: cont.VolumeMounts, 416 }, 417 }, 418 Volumes: volumes, 419 }, 420 }, 421 }, 422 } 423 424 job := jobv1.New(k8sJob, &jobv1.Timeouts{ 425 WaitUntilActive: timeouts.JobStart.Get(), 426 WaitUntilFinished: timeouts.JobCompletion.Get(), 427 }) 428 429 if hsmConfig != nil { 430 migrationCont := job.MustGetContainer("dbmigration") 431 migrationCont.Env = append(migrationCont.Env, hsmConfig.Envs...) 432 433 volume := corev1.Volume{ 434 Name: "shared", 435 VolumeSource: corev1.VolumeSource{ 436 EmptyDir: &corev1.EmptyDirVolumeSource{ 437 Medium: corev1.StorageMediumMemory, 438 }, 439 }, 440 } 441 job.Spec.Template.Spec.Volumes = util.AppendVolumeIfMissing(job.Spec.Template.Spec.Volumes, volume) 442 443 initCont := HSMInitContainer(instance, hsmConfig) 444 job.Spec.Template.Spec.InitContainers = append(job.Spec.Template.Spec.InitContainers, *initCont.Container) 445 446 if hsmConfig.Daemon != nil { 447 // Unable to launch daemon if not running priviledged moe 448 t := true 449 migrationCont.SecurityContext.Privileged = &t 450 migrationCont.SecurityContext.AllowPrivilegeEscalation = &t 451 452 // This is the shared volume where the file 'pkcsslotd-luanched' is touched to let 453 // other containers know that the daemon has successfully launched. 454 migrationCont.AppendVolumeMountIfMissing("shared", "/shared") 455 456 // Update command in deployment to ensure that deamon is running before starting the ca 457 migrationCont.Command = []string{ 458 "sh", 459 "-c", 460 fmt.Sprintf("%s && %s", config.DAEMON_CHECK_CMD, command), 461 } 462 463 var pvcMount *corev1.VolumeMount 464 for _, vm := range hsmConfig.MountPaths { 465 if vm.UsePVC { 466 pvcMount = &corev1.VolumeMount{ 467 Name: "fabric-peer-0", 468 MountPath: vm.MountPath, 469 } 470 } 471 } 472 473 // Add daemon container to the job 474 config.AddDaemonContainer(hsmConfig, job, instance.GetResource(current.HSMDAEMON), pvcMount) 475 476 // If a pvc mount has been configured in HSM config, set the volume mount on the CertGen container 477 if pvcMount != nil { 478 migrationCont.AppendVolumeMountIfMissing(pvcMount.Name, pvcMount.MountPath) 479 } 480 } 481 } 482 483 return job 484 } 485 486 // HSMInitContainer creates a container that copies the HSM library to shared volume 487 func HSMInitContainer(instance *current.IBPPeer, hsmConfig *config.HSMConfig) *container.Container { 488 hsmLibraryPath := hsmConfig.Library.FilePath 489 hsmLibraryName := filepath.Base(hsmLibraryPath) 490 491 f := false 492 user := int64(0) 493 mountPath := "/shared" 494 initCont := &container.Container{ 495 Container: &corev1.Container{ 496 Name: "hsm-client", 497 Image: image.Format(instance.Spec.Images.HSMImage, instance.Spec.Images.HSMTag), 498 ImagePullPolicy: corev1.PullAlways, 499 Command: []string{ 500 "sh", 501 "-c", 502 fmt.Sprintf("mkdir -p %s/hsm && dst=\"%s/hsm/%s\" && echo \"Copying %s to ${dst}\" && mkdir -p $(dirname $dst) && cp -r %s $dst", mountPath, mountPath, hsmLibraryName, hsmLibraryPath, hsmLibraryPath), 503 }, 504 SecurityContext: &corev1.SecurityContext{ 505 RunAsUser: &user, 506 RunAsNonRoot: &f, 507 }, 508 VolumeMounts: []corev1.VolumeMount{ 509 { 510 Name: "shared", 511 MountPath: mountPath, 512 }, 513 }, 514 Resources: corev1.ResourceRequirements{ 515 Requests: corev1.ResourceList{ 516 corev1.ResourceCPU: resource.MustParse("0.1"), 517 corev1.ResourceMemory: resource.MustParse("100Mi"), 518 }, 519 Limits: corev1.ResourceList{ 520 corev1.ResourceCPU: resource.MustParse("1"), 521 corev1.ResourceMemory: resource.MustParse("500Mi"), 522 }, 523 }, 524 }, 525 } 526 527 return initCont 528 }