github.com/jlmeeker/kismatic@v1.10.1-0.20180612190640-57f9005a1f1a/pkg/install/execute.go (about) 1 package install 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "os" 9 "path/filepath" 10 "time" 11 12 "strings" 13 14 "github.com/apprenda/kismatic/pkg/ansible" 15 "github.com/apprenda/kismatic/pkg/install/explain" 16 "github.com/apprenda/kismatic/pkg/tls" 17 "github.com/apprenda/kismatic/pkg/util" 18 ) 19 20 // The PreFlightExecutor will run pre-flight checks against the 21 // environment defined in the plan file 22 type PreFlightExecutor interface { 23 RunPreFlightCheck(plan *Plan, nodes ...string) error 24 RunNewNodePreFlightCheck(Plan, Node) error 25 RunUpgradePreFlightCheck(*Plan, ListableNode) error 26 } 27 28 // The Executor will carry out the installation plan 29 type Executor interface { 30 PreFlightExecutor 31 Install(plan *Plan, restartServices bool, nodes ...string) error 32 Reset(plan *Plan, nodes ...string) error 33 GenerateCertificates(p *Plan, useExistingCA bool) error 34 RunSmokeTest(*Plan) error 35 AddNode(plan *Plan, node Node, roles []string, restartServices bool) (*Plan, error) 36 RunPlay(name string, plan *Plan, restartServices bool, nodes ...string) error 37 AddVolume(*Plan, StorageVolume) error 38 DeleteVolume(*Plan, string) error 39 UpgradeNodes(plan Plan, nodesToUpgrade []ListableNode, onlineUpgrade bool, maxParallelWorkers int, restartServices bool) error 40 ValidateControlPlane(plan Plan) error 41 UpgradeClusterServices(plan Plan) error 42 } 43 44 // DiagnosticsExecutor will run diagnostics on the nodes after an install 45 type DiagnosticsExecutor interface { 46 DiagnoseNodes(plan Plan) error 47 } 48 49 // ExecutorOptions are used to configure the executor 50 type ExecutorOptions struct { 51 // GeneratedAssetsDirectory is the location where generated assets 52 // are to be stored 53 GeneratedAssetsDirectory string 54 // OutputFormat sets the format of the executor 55 OutputFormat string 56 // Verbose output from the executor 57 Verbose bool 58 // RunsDirectory is where information about installation runs is kept 59 RunsDirectory string 60 // DiagnosticsDirecty is where the doDiagnostics information about the cluster will be dumped 61 DiagnosticsDirecty string 62 // DryRun determines if the executor should actually run the task 63 DryRun bool 64 } 65 66 // NewExecutor returns an executor for performing installations according to the installation plan. 67 func NewExecutor(stdout io.Writer, errOut io.Writer, options ExecutorOptions) (Executor, error) { 68 ansibleDir := "ansible" 69 if options.GeneratedAssetsDirectory == "" { 70 return nil, fmt.Errorf("GeneratedAssetsDirectory option cannot be empty") 71 } 72 if options.RunsDirectory == "" { 73 options.RunsDirectory = "./runs" 74 } 75 76 // Setup the console output format 77 var outFormat ansible.OutputFormat 78 switch options.OutputFormat { 79 case "raw": 80 outFormat = ansible.RawFormat 81 case "simple": 82 outFormat = ansible.JSONLinesFormat 83 default: 84 return nil, fmt.Errorf("Output format %q is not supported", options.OutputFormat) 85 } 86 certsDir := filepath.Join(options.GeneratedAssetsDirectory, "keys") 87 pki := &LocalPKI{ 88 CACsr: filepath.Join(ansibleDir, "playbooks", "tls", "ca-csr.json"), 89 GeneratedCertsDirectory: certsDir, 90 Log: stdout, 91 } 92 return &ansibleExecutor{ 93 options: options, 94 stdout: stdout, 95 consoleOutputFormat: outFormat, 96 ansibleDir: ansibleDir, 97 certsDir: certsDir, 98 pki: pki, 99 }, nil 100 } 101 102 // NewPreFlightExecutor returns an executor for running preflight 103 func NewPreFlightExecutor(stdout io.Writer, errOut io.Writer, options ExecutorOptions) (PreFlightExecutor, error) { 104 ansibleDir := "ansible" 105 if options.RunsDirectory == "" { 106 options.RunsDirectory = "./runs" 107 } 108 // Setup the console output format 109 var outFormat ansible.OutputFormat 110 switch options.OutputFormat { 111 case "raw": 112 outFormat = ansible.RawFormat 113 case "simple": 114 outFormat = ansible.JSONLinesFormat 115 default: 116 return nil, fmt.Errorf("Output format %q is not supported", options.OutputFormat) 117 } 118 119 return &ansibleExecutor{ 120 options: options, 121 stdout: stdout, 122 consoleOutputFormat: outFormat, 123 ansibleDir: ansibleDir, 124 }, nil 125 } 126 127 // NewDiagnosticsExecutor returns an executor for running preflight 128 func NewDiagnosticsExecutor(stdout io.Writer, errOut io.Writer, options ExecutorOptions) (DiagnosticsExecutor, error) { 129 ansibleDir := "ansible" 130 if options.RunsDirectory == "" { 131 options.RunsDirectory = "./runs" 132 } 133 if options.DiagnosticsDirecty == "" { 134 wd, err := os.Getwd() 135 if err != nil { 136 return nil, fmt.Errorf("Could not get working directory: %v", err) 137 } 138 options.DiagnosticsDirecty = filepath.Join(wd, "diagnostics") 139 } 140 141 // Setup the console output format 142 var outFormat ansible.OutputFormat 143 switch options.OutputFormat { 144 case "raw": 145 outFormat = ansible.RawFormat 146 case "simple": 147 outFormat = ansible.JSONLinesFormat 148 default: 149 return nil, fmt.Errorf("Output format %q is not supported", options.OutputFormat) 150 } 151 152 return &ansibleExecutor{ 153 options: options, 154 stdout: stdout, 155 consoleOutputFormat: outFormat, 156 ansibleDir: ansibleDir, 157 }, nil 158 } 159 160 type ansibleExecutor struct { 161 options ExecutorOptions 162 stdout io.Writer 163 consoleOutputFormat ansible.OutputFormat 164 ansibleDir string 165 certsDir string 166 pki PKI 167 168 // Hook for testing purposes.. default implementation is used at runtime 169 runnerExplainerFactory func(explain.AnsibleEventExplainer, io.Writer) (ansible.Runner, *explain.AnsibleEventStreamExplainer, error) 170 } 171 172 type task struct { 173 // name of the task used for the runs dir 174 name string 175 // the inventory of nodes to use 176 inventory ansible.Inventory 177 // the cluster catalog to use 178 clusterCatalog ansible.ClusterCatalog 179 // the playbook filename 180 playbook string 181 // the explainer to use 182 explainer explain.AnsibleEventExplainer 183 // the plan 184 plan Plan 185 // run the task on specific nodes 186 limit []string 187 } 188 189 // execute will run the given task, and setup all what's needed for us to run ansible. 190 func (ae *ansibleExecutor) execute(t task) error { 191 if ae.options.DryRun { 192 return nil 193 } 194 runDirectory, err := ae.createRunDirectory(t.name) 195 if err != nil { 196 return fmt.Errorf("error creating working directory for %q: %v", t.name, err) 197 } 198 // Save the plan file that was used for this execution 199 fp := FilePlanner{ 200 File: filepath.Join(runDirectory, "kismatic-cluster.yaml"), 201 } 202 if err = fp.Write(&t.plan); err != nil { 203 return fmt.Errorf("error recording plan file to %s: %v", fp.File, err) 204 } 205 ansibleLogFilename := filepath.Join(runDirectory, "ansible.log") 206 ansibleLogFile, err := os.Create(ansibleLogFilename) 207 if err != nil { 208 return fmt.Errorf("error creating ansible log file %q: %v", ansibleLogFilename, err) 209 } 210 runner, explainer, err := ae.ansibleRunnerWithExplainer(t.explainer, ansibleLogFile, runDirectory) 211 if err != nil { 212 return err 213 } 214 215 // Start running ansible with the given playbook 216 var eventStream <-chan ansible.Event 217 if t.limit != nil && len(t.limit) != 0 { 218 eventStream, err = runner.StartPlaybookOnNode(t.playbook, t.inventory, t.clusterCatalog, t.limit...) 219 } else { 220 eventStream, err = runner.StartPlaybook(t.playbook, t.inventory, t.clusterCatalog) 221 } 222 if err != nil { 223 return fmt.Errorf("error running ansible playbook: %v", err) 224 } 225 // Ansible blocks until explainer starts reading from stream. Start 226 // explainer in a separate go routine 227 go explainer.Explain(eventStream) 228 229 // Wait until ansible exits 230 if err = runner.WaitPlaybook(); err != nil { 231 return fmt.Errorf("error running playbook: %v", err) 232 } 233 return nil 234 } 235 236 // GenerateCertificatesprivate generates keys and certificates for the cluster, if needed 237 func (ae *ansibleExecutor) GenerateCertificates(p *Plan, useExistingCA bool) error { 238 if err := os.MkdirAll(ae.certsDir, 0777); err != nil { 239 return fmt.Errorf("error creating directory %s for storing TLS assets: %v", ae.certsDir, err) 240 } 241 242 // Generate cluster Certificate Authority 243 util.PrintHeader(ae.stdout, "Configuring Certificates", '=') 244 245 var clusterCACert *tls.CA 246 var err error 247 if useExistingCA { 248 exists, err := ae.pki.CertificateAuthorityExists() 249 if err != nil { 250 return fmt.Errorf("error checking if CA exists: %v", err) 251 } 252 if !exists { 253 return errors.New("The Certificate Authority is required, but it was not found.") 254 } 255 clusterCACert, err = ae.pki.GetClusterCA() 256 if err != nil { 257 return fmt.Errorf("error reading CA certificate: %v", err) 258 } 259 260 } else { 261 clusterCACert, err = ae.pki.GenerateClusterCA(p) 262 if err != nil { 263 return fmt.Errorf("error generating CA for the cluster: %v", err) 264 } 265 } 266 267 proxyClientCACert, err := ae.pki.GenerateProxyClientCA(p) 268 if err != nil { 269 return fmt.Errorf("error generating CA for the proxy client: %v", err) 270 } 271 272 // Generate node and user certificates 273 err = ae.pki.GenerateClusterCertificates(p, clusterCACert, proxyClientCACert) 274 if err != nil { 275 return fmt.Errorf("error generating certificates for the cluster: %v", err) 276 } 277 278 util.PrettyPrintOk(ae.stdout, "Cluster certificates can be found in the %q directory", ae.options.GeneratedAssetsDirectory) 279 return nil 280 } 281 282 // Install the cluster according to the installation plan 283 func (ae *ansibleExecutor) Install(p *Plan, restartServices bool, nodes ...string) error { 284 // Build the ansible inventory 285 cc, err := ae.buildClusterCatalog(p) 286 if err != nil { 287 return err 288 } 289 if restartServices { 290 cc.EnableRestart() 291 } 292 t := task{ 293 name: "apply", 294 playbook: "kubernetes.yaml", 295 plan: *p, 296 inventory: buildInventoryFromPlan(p), 297 clusterCatalog: *cc, 298 explainer: ae.defaultExplainer(), 299 limit: nodes, 300 } 301 util.PrintHeader(ae.stdout, "Installing Cluster", '=') 302 return ae.execute(t) 303 } 304 305 func (ae *ansibleExecutor) Reset(p *Plan, nodes ...string) error { 306 cc, err := ae.buildClusterCatalog(p) 307 if err != nil { 308 return err 309 } 310 t := task{ 311 name: "reset", 312 playbook: "reset.yaml", 313 explainer: ae.defaultExplainer(), 314 plan: *p, 315 inventory: buildInventoryFromPlan(p), 316 clusterCatalog: *cc, 317 limit: nodes, 318 } 319 util.PrintHeader(ae.stdout, "Resetting Nodes in the Cluster", '=') 320 return ae.execute(t) 321 } 322 323 func (ae *ansibleExecutor) RunSmokeTest(p *Plan) error { 324 cc, err := ae.buildClusterCatalog(p) 325 if err != nil { 326 return err 327 } 328 t := task{ 329 name: "smoketest", 330 playbook: "smoketest.yaml", 331 explainer: ae.defaultExplainer(), 332 plan: *p, 333 inventory: buildInventoryFromPlan(p), 334 clusterCatalog: *cc, 335 } 336 util.PrintHeader(ae.stdout, "Running Smoke Test", '=') 337 return ae.execute(t) 338 } 339 340 // RunPreflightCheck against the nodes defined in the plan 341 func (ae *ansibleExecutor) RunPreFlightCheck(p *Plan, nodes ...string) error { 342 cc, err := ae.buildClusterCatalog(p) 343 if err != nil { 344 return err 345 } 346 t := task{ 347 name: "preflight", 348 playbook: "preflight.yaml", 349 inventory: buildInventoryFromPlan(p), 350 clusterCatalog: *cc, 351 explainer: ae.preflightExplainer(), 352 plan: *p, 353 limit: nodes, 354 } 355 return ae.execute(t) 356 } 357 358 // RunNewNodePreFlightCheck runs the preflight checks against a new node 359 func (ae *ansibleExecutor) RunNewNodePreFlightCheck(p Plan, node Node) error { 360 cc, err := ae.buildClusterCatalog(&p) 361 if err != nil { 362 return err 363 } 364 t := task{ 365 name: "copy-inspector", 366 playbook: "copy-inspector.yaml", 367 inventory: buildInventoryFromPlan(&p), 368 clusterCatalog: *cc, 369 explainer: ae.preflightExplainer(), 370 plan: p, 371 } 372 if err := ae.execute(t); err != nil { 373 return err 374 } 375 376 p.Worker.ExpectedCount++ 377 p.Worker.Nodes = append(p.Worker.Nodes, node) 378 t = task{ 379 name: "add-node-preflight", 380 playbook: "preflight.yaml", 381 inventory: buildInventoryFromPlan(&p), 382 clusterCatalog: *cc, 383 explainer: ae.preflightExplainer(), 384 plan: p, 385 limit: []string{node.Host}, 386 } 387 return ae.execute(t) 388 } 389 390 func (ae *ansibleExecutor) RunUpgradePreFlightCheck(p *Plan, node ListableNode) error { 391 inventory := buildInventoryFromPlan(p) 392 cc, err := ae.buildClusterCatalog(p) 393 if err != nil { 394 return err 395 } 396 t := task{ 397 name: "copy-inspector", 398 playbook: "copy-inspector.yaml", 399 inventory: buildInventoryFromPlan(p), 400 clusterCatalog: *cc, 401 explainer: ae.preflightExplainer(), 402 plan: *p, 403 } 404 if err := ae.execute(t); err != nil { 405 return err 406 } 407 t = task{ 408 name: "upgrade-preflight", 409 playbook: "upgrade-preflight.yaml", 410 explainer: ae.preflightExplainer(), 411 plan: *p, 412 inventory: inventory, 413 clusterCatalog: *cc, 414 limit: []string{node.Node.Host}, 415 } 416 return ae.execute(t) 417 } 418 419 func (ae *ansibleExecutor) RunPlay(playName string, p *Plan, restartServices bool, nodes ...string) error { 420 cc, err := ae.buildClusterCatalog(p) 421 if err != nil { 422 return err 423 } 424 if restartServices { 425 cc.EnableRestart() 426 } 427 t := task{ 428 name: "step", 429 playbook: playName, 430 inventory: buildInventoryFromPlan(p), 431 clusterCatalog: *cc, 432 explainer: ae.defaultExplainer(), 433 plan: *p, 434 limit: nodes, 435 } 436 return ae.execute(t) 437 } 438 439 func (ae *ansibleExecutor) AddVolume(plan *Plan, volume StorageVolume) error { 440 // Validate that there are enough storage nodes to satisfy the request 441 nodesRequired := volume.ReplicateCount * volume.DistributionCount 442 if nodesRequired > len(plan.Storage.Nodes) { 443 return fmt.Errorf("the requested volume configuration requires %d storage nodes, but the cluster only has %d.", nodesRequired, len(plan.Storage.Nodes)) 444 } 445 446 cc, err := ae.buildClusterCatalog(plan) 447 if err != nil { 448 return err 449 } 450 // Add storage related vars 451 cc.VolumeName = volume.Name 452 cc.VolumeReplicaCount = volume.ReplicateCount 453 cc.VolumeDistributionCount = volume.DistributionCount 454 cc.VolumeStorageClass = volume.StorageClass 455 cc.VolumeQuotaGB = volume.SizeGB 456 cc.VolumeQuotaBytes = volume.SizeGB * (1 << (10 * 3)) 457 cc.VolumeMount = "/" 458 cc.VolumeReclaimPolicy = volume.ReclaimPolicy 459 cc.VolumeAccessModes = volume.AccessModes 460 461 // Allow nodes and pods to access volumes 462 allowedNodes := plan.Master.Nodes 463 allowedNodes = append(allowedNodes, plan.Worker.Nodes...) 464 allowedNodes = append(allowedNodes, plan.Ingress.Nodes...) 465 allowedNodes = append(allowedNodes, plan.Storage.Nodes...) 466 467 allowed := volume.AllowAddresses 468 allowed = append(allowed, plan.Cluster.Networking.PodCIDRBlock) 469 for _, n := range allowedNodes { 470 ip := n.IP 471 if n.InternalIP != "" { 472 ip = n.InternalIP 473 } 474 allowed = append(allowed, ip) 475 } 476 cc.VolumeAllowedIPs = strings.Join(allowed, ",") 477 478 t := task{ 479 name: "add-volume", 480 playbook: "volume-add.yaml", 481 plan: *plan, 482 inventory: buildInventoryFromPlan(plan), 483 clusterCatalog: *cc, 484 explainer: ae.defaultExplainer(), 485 } 486 util.PrintHeader(ae.stdout, "Add Persistent Storage Volume", '=') 487 return ae.execute(t) 488 } 489 490 func (ae *ansibleExecutor) DeleteVolume(plan *Plan, name string) error { 491 cc, err := ae.buildClusterCatalog(plan) 492 if err != nil { 493 return err 494 } 495 // Add storage related vars 496 cc.VolumeName = name 497 cc.VolumeMount = "/" 498 499 t := task{ 500 name: "delete-volume", 501 playbook: "volume-delete.yaml", 502 plan: *plan, 503 inventory: buildInventoryFromPlan(plan), 504 clusterCatalog: *cc, 505 explainer: ae.defaultExplainer(), 506 } 507 util.PrintHeader(ae.stdout, "Delete Persistent Storage Volume", '=') 508 return ae.execute(t) 509 } 510 511 // UpgradeNodes upgrades the nodes of the cluster in the following phases: 512 // 1. Etcd nodes 513 // 2. Master nodes 514 // 3. Worker nodes (regardless of specialization) 515 // 516 // When a node is being upgraded, all the components of the node are upgraded, regardless of 517 // which phase of the upgrade we are in. For example, when upgrading a node that is both an etcd and master, 518 // the etcd components and the master components will be upgraded when we are in the upgrade etcd nodes 519 // phase. 520 func (ae *ansibleExecutor) UpgradeNodes(plan Plan, nodesToUpgrade []ListableNode, onlineUpgrade bool, maxParallelWorkers int, restartServices bool) error { 521 // Nodes can have multiple roles. For this reason, we need to keep track of which nodes 522 // have been upgraded to avoid re-upgrading them. 523 upgradedNodes := map[string]bool{} 524 // Upgrade etcd nodes 525 for _, nodeToUpgrade := range nodesToUpgrade { 526 for _, role := range nodeToUpgrade.Roles { 527 if role == "etcd" { 528 node := nodeToUpgrade 529 if err := ae.upgradeNodes(plan, onlineUpgrade, restartServices, node); err != nil { 530 return fmt.Errorf("error upgrading node %q: %v", node.Node.Host, err) 531 } 532 upgradedNodes[node.Node.IP] = true 533 break 534 } 535 } 536 } 537 538 // Upgrade master nodes 539 for _, nodeToUpgrade := range nodesToUpgrade { 540 if upgradedNodes[nodeToUpgrade.Node.IP] == true { 541 continue 542 } 543 for _, role := range nodeToUpgrade.Roles { 544 if role == "master" { 545 node := nodeToUpgrade 546 if err := ae.upgradeNodes(plan, onlineUpgrade, restartServices, node); err != nil { 547 return fmt.Errorf("error upgrading node %q: %v", node.Node.Host, err) 548 } 549 upgradedNodes[node.Node.IP] = true 550 break 551 } 552 } 553 } 554 555 var limitNodes []ListableNode 556 // Upgrade the rest of the nodes 557 for n, nodeToUpgrade := range nodesToUpgrade { 558 if upgradedNodes[nodeToUpgrade.Node.IP] == true { 559 continue 560 } 561 for _, role := range nodeToUpgrade.Roles { 562 if role != "etcd" && role != "master" { 563 node := nodeToUpgrade 564 limitNodes = append(limitNodes, node) 565 // don't forget to run the remaining nodes if its < maxParallelWorkers 566 if len(limitNodes) == maxParallelWorkers || n == len(nodesToUpgrade)-1 { 567 if err := ae.upgradeNodes(plan, onlineUpgrade, restartServices, limitNodes...); err != nil { 568 return fmt.Errorf("error upgrading node %q: %v", node.Node.Host, err) 569 } 570 // empty the slice 571 limitNodes = limitNodes[:0] 572 } 573 upgradedNodes[node.Node.IP] = true 574 break 575 } 576 } 577 } 578 return nil 579 } 580 581 func (ae *ansibleExecutor) upgradeNodes(plan Plan, onlineUpgrade bool, restartServices bool, nodes ...ListableNode) error { 582 inventory := buildInventoryFromPlan(&plan) 583 cc, err := ae.buildClusterCatalog(&plan) 584 if err != nil { 585 return err 586 } 587 cc.OnlineUpgrade = onlineUpgrade 588 if restartServices { 589 cc.EnableRestart() 590 } 591 var limit []string 592 nodeRoles := make(map[string][]string) 593 for _, node := range nodes { 594 limit = append(limit, node.Node.Host) 595 nodeRoles[node.Node.Host] = node.Roles 596 } 597 t := task{ 598 name: "upgrade-nodes", 599 playbook: "upgrade-nodes.yaml", 600 inventory: inventory, 601 clusterCatalog: *cc, 602 plan: plan, 603 explainer: ae.defaultExplainer(), 604 limit: limit, 605 } 606 if len(limit) == 1 { 607 util.PrintHeader(ae.stdout, fmt.Sprintf("Upgrade Node: %s %s", limit, nodes[0].Roles), '=') 608 } else { // print the roles for multiple nodes 609 util.PrintHeader(ae.stdout, "Upgrade Nodes:", '=') 610 util.PrintTable(ae.stdout, nodeRoles) 611 } 612 return ae.execute(t) 613 } 614 615 func (ae *ansibleExecutor) ValidateControlPlane(plan Plan) error { 616 inventory := buildInventoryFromPlan(&plan) 617 cc, err := ae.buildClusterCatalog(&plan) 618 if err != nil { 619 return err 620 } 621 t := task{ 622 name: "validate-control-plane", 623 playbook: "validate-control-plane.yaml", 624 inventory: inventory, 625 clusterCatalog: *cc, 626 plan: plan, 627 explainer: ae.defaultExplainer(), 628 } 629 return ae.execute(t) 630 } 631 632 func (ae *ansibleExecutor) UpgradeClusterServices(plan Plan) error { 633 inventory := buildInventoryFromPlan(&plan) 634 cc, err := ae.buildClusterCatalog(&plan) 635 if err != nil { 636 return err 637 } 638 t := task{ 639 name: "upgrade-cluster-services", 640 playbook: "upgrade-cluster-services.yaml", 641 inventory: inventory, 642 clusterCatalog: *cc, 643 plan: plan, 644 explainer: ae.defaultExplainer(), 645 } 646 return ae.execute(t) 647 } 648 649 func (ae *ansibleExecutor) DiagnoseNodes(plan Plan) error { 650 inventory := buildInventoryFromPlan(&plan) 651 cc, err := ae.buildClusterCatalog(&plan) 652 if err != nil { 653 return err 654 } 655 // dateTime will be appended to the diagnostics directory 656 now := time.Now().Format("2006-01-02-15-04-05") 657 cc.DiagnosticsDirectory = filepath.Join(ae.options.DiagnosticsDirecty, now) 658 cc.DiagnosticsDateTime = now 659 t := task{ 660 name: "diagnose", 661 playbook: "diagnose-nodes.yaml", 662 inventory: inventory, 663 clusterCatalog: *cc, 664 plan: plan, 665 explainer: ae.defaultExplainer(), 666 } 667 return ae.execute(t) 668 } 669 670 // creates the extra vars that are required for the installation playbook. 671 func (ae *ansibleExecutor) buildClusterCatalog(p *Plan) (*ansible.ClusterCatalog, error) { 672 tlsDir, err := filepath.Abs(ae.certsDir) 673 if err != nil { 674 return nil, fmt.Errorf("failed to determine absolute path to %s: %v", ae.certsDir, err) 675 } 676 677 dnsIP, err := getDNSServiceIP(p) 678 if err != nil { 679 return nil, fmt.Errorf("error getting DNS service IP: %v", err) 680 } 681 682 cc := ansible.ClusterCatalog{ 683 ClusterName: p.Cluster.Name, 684 AdminPassword: p.Cluster.AdminPassword, 685 TLSDirectory: tlsDir, 686 ServicesCIDR: p.Cluster.Networking.ServiceCIDRBlock, 687 PodCIDR: p.Cluster.Networking.PodCIDRBlock, 688 DNSServiceIP: dnsIP, 689 EnableModifyHosts: p.Cluster.Networking.UpdateHostsFiles, 690 EnablePackageInstallation: !p.Cluster.DisablePackageInstallation, 691 KismaticPreflightCheckerLinux: filepath.Join("inspector", "linux", "amd64", "kismatic-inspector"), 692 KuberangPath: filepath.Join("kuberang", "linux", "amd64", "kuberang"), 693 DisconnectedInstallation: p.Cluster.DisconnectedInstallation, 694 HTTPProxy: p.Cluster.Networking.HTTPProxy, 695 HTTPSProxy: p.Cluster.Networking.HTTPSProxy, 696 TargetVersion: KismaticVersion.String(), 697 APIServerOptions: p.Cluster.APIServerOptions.Overrides, 698 KubeControllerManagerOptions: p.Cluster.KubeControllerManagerOptions.Overrides, 699 KubeSchedulerOptions: p.Cluster.KubeSchedulerOptions.Overrides, 700 KubeProxyOptions: p.Cluster.KubeProxyOptions.Overrides, 701 KubeletOptions: p.Cluster.KubeletOptions.Overrides, 702 } 703 704 // set versions 705 cc.Versions.Kubernetes = p.Cluster.Version 706 cc.Versions.KubernetesYum = p.Cluster.Version[1:] + "-0" 707 cc.Versions.KubernetesDeb = p.Cluster.Version[1:] + "-00" 708 709 cc.NoProxy = strings.Join(p.AllAddresses(), ",") 710 if p.Cluster.Networking.NoProxy != "" { 711 cc.NoProxy = cc.NoProxy + "," + p.Cluster.Networking.NoProxy 712 } 713 714 cc.LocalKubeconfigDirectory = filepath.Join(ae.options.GeneratedAssetsDirectory, "kubeconfig") 715 // absolute path required for ansible 716 generatedDir, err := filepath.Abs(filepath.Join(ae.options.GeneratedAssetsDirectory, "kubeconfig")) 717 if err != nil { 718 return nil, fmt.Errorf("failed to determine absolute path to %s: %v", filepath.Join(ae.options.GeneratedAssetsDirectory, "kubeconfig"), err) 719 } 720 cc.LocalKubeconfigDirectory = generatedDir 721 722 // Setup FQDN or default to first master 723 if p.Master.LoadBalancedFQDN != "" { 724 cc.LoadBalancedFQDN = p.Master.LoadBalancedFQDN 725 } else { 726 cc.LoadBalancedFQDN = p.Master.Nodes[0].InternalIP 727 } 728 729 if p.PrivateRegistryProvided() { 730 cc.ConfigureDockerWithPrivateRegistry = true 731 cc.DockerRegistryServer = p.DockerRegistry.Server 732 cc.DockerRegistryCAPath = p.DockerRegistry.CAPath 733 cc.DockerRegistryUsername = p.DockerRegistry.Username 734 cc.DockerRegistryPassword = p.DockerRegistry.Password 735 } 736 737 // Setup docker options 738 cc.Docker.Enabled = !p.Docker.Disable 739 cc.Docker.Logs.Driver = p.Docker.Logs.Driver 740 cc.Docker.Logs.Opts = p.Docker.Logs.Opts 741 cc.Docker.Storage.Driver = p.Docker.Storage.Driver 742 cc.Docker.Storage.Opts = p.Docker.Storage.Opts 743 cc.Docker.Storage.OptsList = []string{} 744 // A formatted list to set in docker daemon.json 745 for k, v := range p.Docker.Storage.Opts { 746 cc.Docker.Storage.OptsList = append(cc.Docker.Storage.OptsList, fmt.Sprintf("%s=%s", k, v)) 747 } 748 cc.Docker.Storage.DirectLVMBlockDevice = ansible.DirectLVMBlockDevice{ 749 Path: p.Docker.Storage.DirectLVMBlockDevice.Path, 750 ThinpoolPercent: p.Docker.Storage.DirectLVMBlockDevice.ThinpoolPercent, 751 ThinpoolMetaPercent: p.Docker.Storage.DirectLVMBlockDevice.ThinpoolMetaPercent, 752 ThinpoolAutoextendThreshold: p.Docker.Storage.DirectLVMBlockDevice.ThinpoolAutoextendThreshold, 753 ThinpoolAutoextendPercent: p.Docker.Storage.DirectLVMBlockDevice.ThinpoolAutoextendPercent, 754 } 755 756 if p.Ingress.Nodes != nil && len(p.Ingress.Nodes) > 0 { 757 cc.EnableConfigureIngress = true 758 } else { 759 cc.EnableConfigureIngress = false 760 } 761 762 if p.NFS != nil { 763 for _, n := range p.NFS.Volumes { 764 cc.NFSVolumes = append(cc.NFSVolumes, ansible.NFSVolume{ 765 Path: n.Path, 766 Host: n.Host, 767 }) 768 } 769 } 770 771 cc.EnableGluster = p.Storage.Nodes != nil && len(p.Storage.Nodes) > 0 772 773 cc.CloudProvider = p.Cluster.CloudProvider.Provider 774 cc.CloudConfig = p.Cluster.CloudProvider.Config 775 776 // additional files 777 for _, n := range p.AdditionalFiles { 778 cc.AdditionalFiles = append(cc.AdditionalFiles, ansible.AdditionalFile{ 779 Source: n.Source, 780 Destination: n.Destination, 781 Hosts: n.Hosts, 782 }) 783 } 784 785 // add_ons 786 cc.RunPodValidation = p.NetworkConfigured() 787 // CNI 788 if p.AddOns.CNI != nil && !p.AddOns.CNI.Disable { 789 cc.CNI.Enabled = true 790 cc.CNI.Provider = p.AddOns.CNI.Provider 791 cc.CNI.Options.Portmap.Enabled = !p.AddOns.CNI.Options.Portmap.Disable 792 // Calico 793 cc.CNI.Options.Calico.Mode = p.AddOns.CNI.Options.Calico.Mode 794 cc.CNI.Options.Calico.LogLevel = p.AddOns.CNI.Options.Calico.LogLevel 795 cc.CNI.Options.Calico.WorkloadMTU = p.AddOns.CNI.Options.Calico.WorkloadMTU 796 cc.CNI.Options.Calico.FelixInputMTU = p.AddOns.CNI.Options.Calico.FelixInputMTU 797 cc.CNI.Options.Calico.IPAutodetectionMethod = p.AddOns.CNI.Options.Calico.IPAutodetectionMethod 798 // Weave 799 cc.CNI.Options.Weave.Password = p.AddOns.CNI.Options.Weave.Password 800 if cc.CNI.Provider == cniProviderContiv { 801 cc.InsecureNetworkingEtcd = true 802 } 803 } 804 805 // DNS 806 cc.DNS.Enabled = !p.AddOns.DNS.Disable 807 cc.DNS.Provider = p.AddOns.DNS.Provider 808 cc.DNS.Options.Replicas = p.AddOns.DNS.Options.Replicas 809 810 // heapster 811 if p.AddOns.HeapsterMonitoring != nil && !p.AddOns.HeapsterMonitoring.Disable { 812 cc.Heapster.Enabled = true 813 cc.Heapster.Options.Heapster.Replicas = p.AddOns.HeapsterMonitoring.Options.Heapster.Replicas 814 cc.Heapster.Options.Heapster.ServiceType = p.AddOns.HeapsterMonitoring.Options.Heapster.ServiceType 815 cc.Heapster.Options.Heapster.Sink = p.AddOns.HeapsterMonitoring.Options.Heapster.Sink 816 cc.Heapster.Options.InfluxDB.PVCName = p.AddOns.HeapsterMonitoring.Options.InfluxDB.PVCName 817 } 818 819 // metrics-server 820 cc.MetricsServer.Enabled = !p.AddOns.MetricsServer.Disable 821 822 // dashboard 823 cc.Dashboard.Enabled = !p.AddOns.Dashboard.Disable 824 cc.Dashboard.Options.ServiceType = p.AddOns.Dashboard.Options.ServiceType 825 cc.Dashboard.Options.NodePort = p.AddOns.Dashboard.Options.NodePort 826 827 // package_manager 828 if !p.AddOns.PackageManager.Disable { 829 // Currently only helm is supported 830 switch p.AddOns.PackageManager.Provider { 831 case "helm": 832 cc.Helm.Enabled = true 833 default: 834 cc.Helm.Enabled = true 835 } 836 cc.Helm.Namespace = p.AddOns.PackageManager.Options.Helm.Namespace 837 } 838 839 cc.Rescheduler.Enabled = !p.AddOns.Rescheduler.Disable 840 841 // merge node labels 842 // cannot use inventory file because nodes share roles 843 // set it to a map[host][]key=value 844 cc.NodeLabels = make(map[string][]string) 845 for _, n := range p.getAllNodes() { 846 if val, ok := cc.NodeLabels[n.Host]; ok { 847 cc.NodeLabels[n.Host] = append(val, keyValueList(n.Labels)...) 848 } else { 849 cc.NodeLabels[n.Host] = keyValueList(n.Labels) 850 } 851 } 852 // merge node taints 853 // cannot use inventory file because nodes share roles 854 // set it to a map[host][]key=value:effect 855 cc.NodeTaints = make(map[string][]string) 856 for _, n := range p.getAllNodes() { 857 if val, ok := cc.NodeTaints[n.Host]; ok { 858 cc.NodeTaints[n.Host] = append(val, keyValueEffectList(n.Taints)...) 859 } else { 860 cc.NodeTaints[n.Host] = keyValueEffectList(n.Taints) 861 } 862 } 863 864 // setup kubelet node overrides 865 cc.KubeletNodeOptions = make(map[string]map[string]string) 866 for _, n := range p.GetUniqueNodes() { 867 cc.KubeletNodeOptions[n.Host] = n.KubeletOptions.Overrides 868 } 869 870 return &cc, nil 871 } 872 873 func (ae *ansibleExecutor) createRunDirectory(runName string) (string, error) { 874 start := time.Now() 875 runDirectory := filepath.Join(ae.options.RunsDirectory, runName, start.Format("2006-01-02-15-04-05")) 876 if err := os.MkdirAll(runDirectory, 0777); err != nil { 877 return "", fmt.Errorf("error creating directory: %v", err) 878 } 879 return runDirectory, nil 880 } 881 882 func (ae *ansibleExecutor) ansibleRunnerWithExplainer(explainer explain.AnsibleEventExplainer, ansibleLog io.Writer, runDirectory string) (ansible.Runner, *explain.AnsibleEventStreamExplainer, error) { 883 if ae.runnerExplainerFactory != nil { 884 return ae.runnerExplainerFactory(explainer, ansibleLog) 885 } 886 887 // Setup sink for ansible stdout 888 var ansibleOut io.Writer 889 switch ae.consoleOutputFormat { 890 case ansible.JSONLinesFormat: 891 ansibleOut = timestampWriter(ansibleLog) 892 case ansible.RawFormat: 893 ansibleOut = io.MultiWriter(ae.stdout, timestampWriter(ansibleLog)) 894 } 895 896 // Send stdout and stderr to ansibleOut 897 runner, err := ansible.NewRunner(ansibleOut, ansibleOut, ae.ansibleDir, runDirectory) 898 if err != nil { 899 return nil, nil, fmt.Errorf("error creating ansible runner: %v", err) 900 } 901 902 streamExplainer := &explain.AnsibleEventStreamExplainer{ 903 EventExplainer: explainer, 904 } 905 906 return runner, streamExplainer, nil 907 } 908 909 func (ae *ansibleExecutor) defaultExplainer() explain.AnsibleEventExplainer { 910 var out io.Writer 911 switch ae.consoleOutputFormat { 912 case ansible.JSONLinesFormat: 913 out = ae.stdout 914 case ansible.RawFormat: 915 out = ioutil.Discard 916 } 917 return explain.DefaultExplainer(ae.options.Verbose, out) 918 } 919 920 func (ae *ansibleExecutor) preflightExplainer() explain.AnsibleEventExplainer { 921 var out io.Writer 922 switch ae.consoleOutputFormat { 923 case ansible.JSONLinesFormat: 924 out = ae.stdout 925 case ansible.RawFormat: 926 out = ioutil.Discard 927 } 928 return explain.PreflightExplainer(ae.options.Verbose, out) 929 } 930 931 func buildInventoryFromPlan(p *Plan) ansible.Inventory { 932 etcdNodes := []ansible.Node{} 933 for _, n := range p.Etcd.Nodes { 934 etcdNodes = append(etcdNodes, installNodeToAnsibleNode(&n, &p.Cluster.SSH)) 935 } 936 masterNodes := []ansible.Node{} 937 for _, n := range p.Master.Nodes { 938 masterNodes = append(masterNodes, installNodeToAnsibleNode(&n, &p.Cluster.SSH)) 939 } 940 workerNodes := []ansible.Node{} 941 for _, n := range p.Worker.Nodes { 942 workerNodes = append(workerNodes, installNodeToAnsibleNode(&n, &p.Cluster.SSH)) 943 } 944 ingressNodes := []ansible.Node{} 945 if p.Ingress.Nodes != nil { 946 for _, n := range p.Ingress.Nodes { 947 ingressNodes = append(ingressNodes, installNodeToAnsibleNode(&n, &p.Cluster.SSH)) 948 } 949 } 950 storageNodes := []ansible.Node{} 951 if p.Storage.Nodes != nil { 952 for _, n := range p.Storage.Nodes { 953 storageNodes = append(storageNodes, installNodeToAnsibleNode(&n, &p.Cluster.SSH)) 954 } 955 } 956 957 inventory := ansible.Inventory{ 958 Roles: []ansible.Role{ 959 { 960 Name: "etcd", 961 Nodes: etcdNodes, 962 }, 963 { 964 Name: "master", 965 Nodes: masterNodes, 966 }, 967 { 968 Name: "worker", 969 Nodes: workerNodes, 970 }, 971 { 972 Name: "ingress", 973 Nodes: ingressNodes, 974 }, 975 { 976 Name: "storage", 977 Nodes: storageNodes, 978 }, 979 }, 980 } 981 982 return inventory 983 } 984 985 // Converts plan node to ansible node 986 func installNodeToAnsibleNode(n *Node, s *SSHConfig) ansible.Node { 987 return ansible.Node{ 988 Host: n.Host, 989 PublicIP: n.IP, 990 InternalIP: n.InternalIP, 991 SSHPrivateKey: s.Key, 992 SSHUser: s.User, 993 SSHPort: s.Port, 994 } 995 } 996 997 // Prepend each line of the incoming stream with a timestamp 998 func timestampWriter(out io.Writer) io.Writer { 999 pr, pw := io.Pipe() 1000 go func(r io.Reader) { 1001 lr := util.NewLineReader(r, 64*1024) 1002 var ( 1003 err error 1004 line []byte 1005 ) 1006 for err == nil { 1007 line, err = lr.Read() 1008 fmt.Fprintf(out, "%s - %s\n", time.Now().UTC().Format("2006-01-02 15:04:05.000-0700"), string(line)) 1009 } 1010 if err != io.EOF { 1011 fmt.Printf("Error timestamping ansible logs: %v", err) 1012 } 1013 }(pr) 1014 return pw 1015 } 1016 1017 // key=value slice 1018 func keyValueList(in map[string]string) []string { 1019 pairs := make([]string, 0, len(in)) 1020 for k, v := range in { 1021 pairs = append(pairs, fmt.Sprintf("%s=%s", k, v)) 1022 } 1023 return pairs 1024 } 1025 1026 func keyValueEffectList(in []Taint) []string { 1027 taints := make([]string, 0, len(in)) 1028 for _, taint := range in { 1029 taints = append(taints, fmt.Sprintf("%s=%s:%s", taint.Key, taint.Value, taint.Effect)) 1030 } 1031 return taints 1032 }