github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachprod/vm/aws/support.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package aws
    12  
    13  import (
    14  	"bytes"
    15  	"encoding/json"
    16  	"io/ioutil"
    17  	"log"
    18  	"os/exec"
    19  	"strings"
    20  	"text/template"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/cmd/roachprod/vm"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  // Both M5 and I3 machines expose their EBS or local SSD volumes as NVMe block
    27  // devices, but the actual device numbers vary a bit between the two types.
    28  // This user-data script will create a filesystem, mount the data volume, and
    29  // chmod 777.
    30  // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html
    31  //
    32  // This is a template because the instantiator needs to optionally configure the
    33  // mounting options. The script cannot take arguments since it is to be invoked
    34  // by the aws tool which cannot pass args.
    35  const awsStartupScriptTemplate = `#!/usr/bin/env bash
    36  # Script for setting up a AWS machine for roachprod use.
    37  
    38  set -x
    39  sudo apt-get update
    40  sudo apt-get install -qy --no-install-recommends mdadm
    41  
    42  mount_opts="discard,defaults"
    43  {{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}}
    44  
    45  disks=()
    46  mountpoint="/mnt/data1"
    47  # On different machine types, the drives are either called nvme... or xvdd.
    48  for d in $(ls /dev/nvme?n1 /dev/xvdd); do
    49    if ! mount | grep ${d}; then
    50      disks+=("${d}")
    51      echo "Disk ${d} not mounted, creating..."
    52    else
    53      echo "Disk ${d} already mounted, skipping..."
    54    fi
    55  done
    56  if [ "${#disks[@]}" -eq "0" ]; then
    57    echo "No disks mounted, creating ${mountpoint}"
    58    mkdir -p ${mountpoint}
    59    chmod 777 ${mountpoint}
    60  elif [ "${#disks[@]}" -eq "1" ]; then
    61    echo "One disk mounted, creating ${mountpoint}"
    62    mkdir -p ${mountpoint}
    63    disk=${disks[0]}
    64    mkfs.ext4 -E nodiscard ${disk}
    65    mount -o ${mount_opts} ${disk} ${mountpoint}
    66    chmod 777 ${mountpoint}
    67    echo "${disk} ${mountpoint} ext4 ${mount_opts} 1 1" | tee -a /etc/fstab
    68  else
    69    echo "${#disks[@]} disks mounted, creating ${mountpoint} using RAID 0"
    70    mkdir -p ${mountpoint}
    71    raiddisk="/dev/md0"
    72    mdadm --create ${raiddisk} --level=0 --raid-devices=${#disks[@]} "${disks[@]}"
    73    mkfs.ext4 -E nodiscard ${raiddisk}
    74    mount -o ${mount_opts} ${raiddisk} ${mountpoint}
    75    chmod 777 ${mountpoint}
    76    echo "${raiddisk} ${mountpoint} ext4 ${mount_opts} 1 1" | tee -a /etc/fstab
    77  fi
    78  
    79  sudo apt-get install -qy chrony
    80  echo -e "\nserver 169.254.169.123 prefer iburst" | sudo tee -a /etc/chrony/chrony.conf
    81  echo -e "\nmakestep 0.1 3" | sudo tee -a /etc/chrony/chrony.conf
    82  sudo /etc/init.d/chrony restart
    83  sudo chronyc -a waitsync 30 0.01 | sudo tee -a /root/chrony.log
    84  
    85  # sshguard can prevent frequent ssh connections to the same host. Disable it.
    86  sudo service sshguard stop
    87  # increase the number of concurrent unauthenticated connections to the sshd
    88  # daemon. See https://en.wikibooks.org/wiki/OpenSSH/Cookbook/Load_Balancing.
    89  # By default, only 10 unauthenticated connections are permitted before sshd
    90  # starts randomly dropping connections.
    91  sudo sh -c 'echo "MaxStartups 64:30:128" >> /etc/ssh/sshd_config'
    92  # Crank up the logging for issues such as:
    93  # https://github.com/cockroachdb/cockroach/issues/36929
    94  sudo sed -i'' 's/LogLevel.*$/LogLevel DEBUG3/' /etc/ssh/sshd_config
    95  sudo service sshd restart
    96  # increase the default maximum number of open file descriptors for
    97  # root and non-root users. Load generators running a lot of concurrent
    98  # workers bump into this often.
    99  sudo sh -c 'echo "root - nofile 65536\n* - nofile 65536" > /etc/security/limits.d/10-roachprod-nofiles.conf'
   100  
   101  # Enable core dumps
   102  cat <<EOF > /etc/security/limits.d/core_unlimited.conf
   103  * soft core unlimited
   104  * hard core unlimited
   105  root soft core unlimited
   106  root hard core unlimited
   107  EOF
   108  
   109  mkdir -p /mnt/data1/cores
   110  chmod a+w /mnt/data1/cores
   111  CORE_PATTERN="/mnt/data1/cores/core.%e.%p.%h.%t"
   112  echo "$CORE_PATTERN" > /proc/sys/kernel/core_pattern
   113  sed -i'~' 's/enabled=1/enabled=0/' /etc/default/apport
   114  sed -i'~' '/.*kernel\\.core_pattern.*/c\\' /etc/sysctl.conf
   115  echo "kernel.core_pattern=$CORE_PATTERN" >> /etc/sysctl.conf
   116  
   117  sysctl --system  # reload sysctl settings
   118  
   119  sudo touch /mnt/data1/.roachprod-initialized
   120  `
   121  
   122  // writeStartupScript writes the startup script to a temp file.
   123  // Returns the path to the file.
   124  // After use, the caller should delete the temp file.
   125  //
   126  // extraMountOpts, if not empty, is appended to the default mount options. It is
   127  // a comma-separated list of options for the "mount -o" flag.
   128  func writeStartupScript(extraMountOpts string) (string, error) {
   129  	type tmplParams struct {
   130  		ExtraMountOpts string
   131  	}
   132  
   133  	args := tmplParams{ExtraMountOpts: extraMountOpts}
   134  
   135  	tmpfile, err := ioutil.TempFile("", "aws-startup-script")
   136  	if err != nil {
   137  		return "", err
   138  	}
   139  	defer tmpfile.Close()
   140  
   141  	t := template.Must(template.New("start").Parse(awsStartupScriptTemplate))
   142  	if err := t.Execute(tmpfile, args); err != nil {
   143  		return "", err
   144  	}
   145  	return tmpfile.Name(), nil
   146  }
   147  
   148  // runCommand is used to invoke an AWS command.
   149  func (p *Provider) runCommand(args []string) ([]byte, error) {
   150  
   151  	if p.opts.Profile != "" {
   152  		args = append(args[:len(args):len(args)], "--profile", p.opts.Profile)
   153  	}
   154  	var stderrBuf bytes.Buffer
   155  	cmd := exec.Command("aws", args...)
   156  	cmd.Stderr = &stderrBuf
   157  	output, err := cmd.Output()
   158  	if err != nil {
   159  		if exitErr := (*exec.ExitError)(nil); errors.As(err, &exitErr) {
   160  			log.Println(string(exitErr.Stderr))
   161  		}
   162  		return nil, errors.Wrapf(err, "failed to run: aws %s: stderr: %v",
   163  			strings.Join(args, " "), stderrBuf.String())
   164  	}
   165  	return output, nil
   166  }
   167  
   168  // runJSONCommand invokes an aws command and parses the json output.
   169  func (p *Provider) runJSONCommand(args []string, parsed interface{}) error {
   170  	// Force json output in case the user has overridden the default behavior.
   171  	args = append(args[:len(args):len(args)], "--output", "json")
   172  	rawJSON, err := p.runCommand(args)
   173  	if err != nil {
   174  		return err
   175  	}
   176  	if err := json.Unmarshal(rawJSON, &parsed); err != nil {
   177  		return errors.Wrapf(err, "failed to parse json %s", rawJSON)
   178  	}
   179  
   180  	return nil
   181  }
   182  
   183  // regionMap collates VM instances by their region.
   184  func regionMap(vms vm.List) (map[string]vm.List, error) {
   185  	// Fan out the work by region
   186  	byRegion := make(map[string]vm.List)
   187  	for _, m := range vms {
   188  		region, err := zoneToRegion(m.Zone)
   189  		if err != nil {
   190  			return nil, err
   191  		}
   192  		byRegion[region] = append(byRegion[region], m)
   193  	}
   194  	return byRegion, nil
   195  }
   196  
   197  // zoneToRegion converts an availability zone like us-east-2a to the zone name us-east-2
   198  func zoneToRegion(zone string) (string, error) {
   199  	return zone[0 : len(zone)-1], nil
   200  }