github.com/coreos/mantle@v0.13.0/kola/tests/etcd/rhcos.go (about)

     1  // Copyright 2018 Red Hat, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package etcd
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/coreos/mantle/kola/cluster"
    24  	"github.com/coreos/mantle/kola/register"
    25  	"github.com/coreos/mantle/platform/conf"
    26  	"github.com/coreos/mantle/util"
    27  )
    28  
    29  func init() {
    30  	register.Register(&register.Test{
    31  		Run:         rhcosClusterInsecure,
    32  		ClusterSize: 3,
    33  		Name:        "rhcos.etcd.cluster.insecure",
    34  		UserData: conf.Ignition(`{
    35    "ignition": { "version": "2.2.0" },
    36    "systemd": {
    37      "units": [
    38        {
    39          "name": "etcd.service",
    40          "enable": false,
    41          "contents": "[Unit]\nDescription=etcd in podman\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\nRestart=on-failure\nRestartSec=10s\nExecStart=/usr/bin/podman run --name=etcd --net=host quay.io/coreos/etcd /usr/local/bin/etcd --name=${NODE_NAME} --initial-advertise-peer-urls=http://${NODE_IP}:2380 --listen-peer-urls=http://${NODE_IP}:2380 --advertise-client-urls=http://${NODE_IP}:2379 --listen-client-urls=http://${NODE_IP}:2379,http://127.0.0.1:2379 --initial-cluster=${CLUSTER} --initial-cluster-state=new --initial-cluster-token=${TOKEN}\n\n[Install]\nWantedBy=multi-user.target\n",
    42          "dropins": [{
    43            "name": "cluster.conf",
    44            "contents": "# placeholder"
    45          }]
    46        }
    47      ]
    48    }
    49  }`),
    50  		UserDataV3: conf.Ignition(`{
    51    "ignition": { "version": "3.0.0" },
    52    "systemd": {
    53      "units": [
    54        {
    55          "name": "etcd.service",
    56          "enable": false,
    57          "contents": "[Unit]\nDescription=etcd in podman\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\nRestart=on-failure\nRestartSec=10s\nExecStart=/usr/bin/podman run --name=etcd --net=host quay.io/coreos/etcd /usr/local/bin/etcd --name=${NODE_NAME} --initial-advertise-peer-urls=http://${NODE_IP}:2380 --listen-peer-urls=http://${NODE_IP}:2380 --advertise-client-urls=http://${NODE_IP}:2379 --listen-client-urls=http://${NODE_IP}:2379,http://127.0.0.1:2379 --initial-cluster=${CLUSTER} --initial-cluster-state=new --initial-cluster-token=${TOKEN}\n\n[Install]\nWantedBy=multi-user.target\n",
    58          "dropins": [{
    59            "name": "cluster.conf",
    60            "contents": "# placeholder"
    61          }]
    62        }
    63      ]
    64    }
    65  }`),
    66  		Flags:   []register.Flag{register.RequiresInternetAccess}, // fetching etcd requires networking
    67  		Distros: []string{"rhcos"},
    68  	})
    69  	register.Register(&register.Test{
    70  		Run:         rhcosClusterTLS,
    71  		ClusterSize: 3,
    72  		Name:        "rhcos.etcd.cluster.tls",
    73  		UserData: conf.Ignition(`{
    74    "ignition": { "version": "2.2.0" },
    75    "systemd": {
    76      "units": [
    77        {
    78          "name": "etcd.service",
    79          "enable": false,
    80          "contents": "[Unit]\nDescription=etcd in podman\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\nRestart=on-failure\nRestartSec=10s\nExecStart=/usr/bin/podman run --name=etcd --net=host --security-opt=label=disable --volume=/etc/ssl/certs:/etc/ssl/certs:ro quay.io/coreos/etcd /usr/local/bin/etcd --name=${NODE_NAME} --initial-advertise-peer-urls=https://${NODE_IP}:2380 --listen-peer-urls=https://${NODE_IP}:2380 --advertise-client-urls=https://${NODE_IP}:2379 --listen-client-urls=https://${NODE_IP}:2379,http://127.0.0.1:2379 --initial-cluster=${CLUSTER} --initial-cluster-state=new --initial-cluster-token=${TOKEN} --cert-file=/etc/ssl/certs/etcd-cert.pem --key-file=/etc/ssl/certs/etcd-key.pem --peer-cert-file=/etc/ssl/certs/peer-cert.pem --peer-key-file=/etc/ssl/certs/peer-key.pem --peer-client-cert-auth --peer-trusted-ca-file=/etc/ssl/certs/ca-peer-cert.pem\n\n[Install]\nWantedBy=multi-user.target\n",
    81          "dropins": [{
    82            "name": "cluster.conf",
    83            "contents": "# placeholder"
    84          }]
    85        }
    86      ]
    87    },
    88    "storage": {
    89      "files": [
    90        {
    91          "filesystem": "root",
    92          "path": "/etc/ssl/etcd.cnf",
    93          "contents": { "source": "data:,%5Breq%5D%0Adistinguished_name=req%0A%5Betcd_ca%5D%0AbasicConstraints=CA:true%0AkeyUsage=keyCertSign,cRLSign%0AsubjectKeyIdentifier=hash%0A%5Betcd_peer%5D%0AbasicConstraints=CA:FALSE%0AextendedKeyUsage=clientAuth,serverAuth%0AkeyUsage=digitalSignature,keyEncipherment%0AsubjectAltName=@sans%0A%5Betcd_server%5D%0AbasicConstraints=CA:FALSE%0AextendedKeyUsage=serverAuth%0AkeyUsage=digitalSignature,keyEncipherment%0AsubjectAltName=@sans%0A%5Bsans%5D%0ADNS.1=localhost%0AIP.1=127.0.0.1%0A" },
    94          "mode": 420
    95        }
    96      ]
    97    }
    98  }`),
    99  		UserDataV3: conf.Ignition(`{
   100    "ignition": { "version": "3.0.0" },
   101    "systemd": {
   102      "units": [
   103        {
   104          "name": "etcd.service",
   105          "enable": false,
   106          "contents": "[Unit]\nDescription=etcd in podman\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\nRestart=on-failure\nRestartSec=10s\nExecStart=/usr/bin/podman run --name=etcd --net=host --security-opt=label=disable --volume=/etc/ssl/certs:/etc/ssl/certs:ro quay.io/coreos/etcd /usr/local/bin/etcd --name=${NODE_NAME} --initial-advertise-peer-urls=https://${NODE_IP}:2380 --listen-peer-urls=https://${NODE_IP}:2380 --advertise-client-urls=https://${NODE_IP}:2379 --listen-client-urls=https://${NODE_IP}:2379,http://127.0.0.1:2379 --initial-cluster=${CLUSTER} --initial-cluster-state=new --initial-cluster-token=${TOKEN} --cert-file=/etc/ssl/certs/etcd-cert.pem --key-file=/etc/ssl/certs/etcd-key.pem --peer-cert-file=/etc/ssl/certs/peer-cert.pem --peer-key-file=/etc/ssl/certs/peer-key.pem --peer-client-cert-auth --peer-trusted-ca-file=/etc/ssl/certs/ca-peer-cert.pem\n\n[Install]\nWantedBy=multi-user.target\n",
   107          "dropins": [{
   108            "name": "cluster.conf",
   109            "contents": "# placeholder"
   110          }]
   111        }
   112      ]
   113    },
   114    "storage": {
   115      "files": [
   116        {
   117          "path": "/etc/ssl/etcd.cnf",
   118          "contents": { "source": "data:,%5Breq%5D%0Adistinguished_name=req%0A%5Betcd_ca%5D%0AbasicConstraints=CA:true%0AkeyUsage=keyCertSign,cRLSign%0AsubjectKeyIdentifier=hash%0A%5Betcd_peer%5D%0AbasicConstraints=CA:FALSE%0AextendedKeyUsage=clientAuth,serverAuth%0AkeyUsage=digitalSignature,keyEncipherment%0AsubjectAltName=@sans%0A%5Betcd_server%5D%0AbasicConstraints=CA:FALSE%0AextendedKeyUsage=serverAuth%0AkeyUsage=digitalSignature,keyEncipherment%0AsubjectAltName=@sans%0A%5Bsans%5D%0ADNS.1=localhost%0AIP.1=127.0.0.1%0A" },
   119          "mode": 420
   120        }
   121      ]
   122    }
   123  }`),
   124  		Flags:   []register.Flag{register.RequiresInternetAccess}, // fetching etcd requires networking
   125  		Distros: []string{"rhcos"},
   126  	})
   127  }
   128  
   129  // Run an etcd cluster in podman without TLS or external discovery services.
   130  // Verify it works by checking the cluster health, then writing/reading keys.
   131  func rhcosClusterInsecure(c cluster.TestCluster) {
   132  	machines := c.Machines()
   133  
   134  	// Generate the initial cluster value.
   135  	cluster := ""
   136  	for index, machine := range machines {
   137  		cluster += fmt.Sprintf(",etcd%d=http://%s:2380", index, machine.PrivateIP())
   138  	}
   139  	cluster = cluster[1:]
   140  
   141  	// Configure the cluster nodes, and start them.
   142  	for index, machine := range machines {
   143  		c.MustSSH(machine, fmt.Sprintf(`set -e ; exec 2>&1
   144  sudo tee /etc/systemd/system/etcd.service.d/cluster.conf << 'EOF' > /dev/null
   145  [Service]
   146  Environment="NODE_NAME=etcd%d"
   147  Environment="NODE_IP=%s"
   148  Environment="CLUSTER=%s"
   149  Environment="TOKEN=etcd-cluster-token"
   150  EOF
   151  sudo systemctl daemon-reload
   152  sudo systemctl enable --now etcd`, index, machine.PrivateIP(), cluster))
   153  	}
   154  
   155  	// Test the reported cluster health.
   156  	if err := rhcosClusterHealth(c); err != nil {
   157  		c.Fatalf("discovery failed cluster-health check: %v", err)
   158  	}
   159  
   160  	// Test writing keys with curl over local HTTP.
   161  	var keyMap map[string]string
   162  	keyMap, err := setKeys(c, 5)
   163  	if err != nil {
   164  		c.Fatalf("failed to set keys: %v", err)
   165  	}
   166  
   167  	// Test reading keys with curl over local HTTP.
   168  	if err := checkKeys(c, keyMap); err != nil {
   169  		c.Fatalf("failed to check keys: %v", err)
   170  	}
   171  }
   172  
   173  // Run an etcd cluster in podman with TLS without discovery services.
   174  // Verify it works by checking the cluster health, then writing/reading keys.
   175  // Keys are written/read to the local node over plain HTTP so etcdctl commands
   176  // don't need cert args.  There are separate commands to test writing/reading
   177  // keys over HTTPS with the nodes' external IP addresses.  Communication
   178  // between peers uses TLS with both client and server authentication, but etcd
   179  // clients do not need to use cert auth.
   180  func rhcosClusterTLS(c cluster.TestCluster) {
   181  	machines := c.Machines()
   182  
   183  	// Generate the initial cluster value.
   184  	cluster := ""
   185  	for index, machine := range machines {
   186  		cluster += fmt.Sprintf(",etcd%d=https://%s:2380", index, machine.PrivateIP())
   187  	}
   188  	cluster = cluster[1:]
   189  
   190  	// Create the CA certs and keys.
   191  	rhcosClusterCreateCAFiles(c)
   192  
   193  	// Configure the cluster nodes, and start them.
   194  	for index, machine := range machines {
   195  		c.MustSSH(machine, fmt.Sprintf(`set -e ; exec 2>&1
   196  echo -e 'DNS.2=etcd%d\nIP.2=%s' | sudo tee -a /etc/ssl/etcd.cnf > /dev/null
   197  sudo openssl req -config /etc/ssl/etcd.cnf -x509 -nodes -newkey rsa:4096 -sha512 -days 3 -extensions etcd_server -subj '/CN=etcd%d' -out /etc/ssl/certs/etcd-cert-self.pem -keyout /etc/ssl/certs/etcd-key.pem
   198  sudo openssl req -config /etc/ssl/etcd.cnf -x509 -nodes -newkey rsa:4096 -sha512 -days 3 -extensions etcd_peer -subj '/CN=etcd%d peer' -out /etc/ssl/certs/peer-cert-self.pem -keyout /etc/ssl/certs/peer-key.pem
   199  sudo openssl x509 -CA /etc/ssl/certs/ca-etcd-cert.pem -CAkey /etc/ssl/certs/ca-etcd-key.pem -CAcreateserial -sha512 -days 3 -in /etc/ssl/certs/etcd-cert-self.pem -out /etc/ssl/certs/etcd-cert.pem
   200  sudo openssl x509 -CA /etc/ssl/certs/ca-peer-cert.pem -CAkey /etc/ssl/certs/ca-peer-key.pem -CAcreateserial -sha512 -days 3 -in /etc/ssl/certs/peer-cert-self.pem -out /etc/ssl/certs/peer-cert.pem
   201  sudo tee /etc/systemd/system/etcd.service.d/cluster.conf << 'EOF' > /dev/null
   202  [Service]
   203  Environment="NODE_NAME=etcd%d"
   204  Environment="NODE_IP=%s"
   205  Environment="CLUSTER=%s"
   206  Environment="TOKEN=etcd-cluster-token"
   207  EOF
   208  sudo systemctl daemon-reload
   209  sudo systemctl enable --now etcd`, index, machine.PrivateIP(), index, index, index, machine.PrivateIP(), cluster))
   210  	}
   211  
   212  	// Test the reported cluster health.
   213  	if err := rhcosClusterHealth(c); err != nil {
   214  		c.Fatalf("discovery failed cluster-health check: %v", err)
   215  	}
   216  
   217  	// Verify writing and reading keys over TLS.
   218  	c.MustSSH(machines[1], fmt.Sprintf("curl -sk https://%s:2379/v2/keys/kolavar -XPUT -d value=kolavalue", machines[1].PrivateIP()))
   219  	c.MustSSH(machines[2], fmt.Sprintf("curl -sk https://%s:2379/v2/keys/kolavar?quorum=true | grep -Fq kolavalue", machines[2].PrivateIP()))
   220  
   221  	// Test writing keys with curl over local HTTP.
   222  	var keyMap map[string]string
   223  	keyMap, err := setKeys(c, 5)
   224  	if err != nil {
   225  		c.Fatalf("failed to set keys: %v", err)
   226  	}
   227  
   228  	// Test reading keys with curl over local HTTP.
   229  	if err := checkKeys(c, keyMap); err != nil {
   230  		c.Fatalf("failed to check keys: %v", err)
   231  	}
   232  }
   233  
   234  // Generate the shared CA certificates and keys on a node, and copy them to the
   235  // other nodes in the cluster to sign their certs.  Yes, you would normally
   236  // copy the nodes' certificates to the system with the CA's private keys to
   237  // sign them, but we don't care about these temporary keys, and this results in
   238  // less file transfers between hosts.
   239  func rhcosClusterCreateCAFiles(c cluster.TestCluster) {
   240  	caNode := c.Machines()[0]
   241  
   242  	// Generate CA certificates on one node.
   243  	c.MustSSH(caNode, `set -e ; exec 2>&1
   244  sudo mkdir -p /etc/ssl/certs
   245  sudo openssl req -config /etc/ssl/etcd.cnf -x509 -nodes -newkey rsa:4096 -sha512 -days 3 -extensions etcd_ca -subj '/CN=etcd CA' -out /etc/ssl/certs/ca-etcd-cert.pem -keyout /etc/ssl/certs/ca-etcd-key.pem
   246  sudo openssl req -config /etc/ssl/etcd.cnf -x509 -nodes -newkey rsa:4096 -sha512 -days 3 -extensions etcd_ca -subj '/CN=peer CA' -out /etc/ssl/certs/ca-peer-cert.pem -keyout /etc/ssl/certs/ca-peer-key.pem`)
   247  
   248  	// Collect and compress the CA files to send to the other nodes.
   249  	tar, err := c.SSH(caNode, "sudo tar -C /etc/ssl/certs -cJ ca-{etcd,peer}-{cert,key}.pem")
   250  	if err != nil {
   251  		c.Fatalf("failed collecting CA files: %v", err)
   252  	}
   253  
   254  	for _, machine := range c.Machines() {
   255  		if machine == caNode {
   256  			continue
   257  		}
   258  
   259  		client, err := machine.SSHClient()
   260  		if err != nil {
   261  			c.Fatalf("failed creating SSH client: %v", err)
   262  		}
   263  		defer client.Close()
   264  
   265  		session, err := client.NewSession()
   266  		if err != nil {
   267  			c.Fatalf("failed creating SSH session: %v", err)
   268  		}
   269  		defer session.Close()
   270  
   271  		session.Stdin = bytes.NewReader(tar)
   272  		out, err := session.CombinedOutput("sudo mkdir -p /etc/ssl/certs && sudo tar -C /etc/ssl/certs -xJ")
   273  		if err != nil {
   274  			c.Fatalf("failed extracting CA files: %q: %v", out, err)
   275  		}
   276  	}
   277  }
   278  
   279  // This is basically GetClusterHealth, but it needs to enter the container to
   280  // call the etcdctl command.  It also specifies the etcd3 API.
   281  func rhcosClusterHealth(c cluster.TestCluster) error {
   282  	var b []byte
   283  	machine := c.Machines()[0]
   284  	csize := len(c.Machines())
   285  
   286  	checker := func() error {
   287  		b, err := c.SSH(machine, "sudo podman exec --env=ETCDCTL_API=3 etcd /usr/local/bin/etcdctl endpoint --cluster health 2>&1")
   288  		if err != nil {
   289  			return err
   290  		}
   291  
   292  		// The repsonse should include "healthy" for each machine.
   293  		if strings.Count(string(b), "is healthy") != csize {
   294  			return fmt.Errorf("unexpected etcdctl output")
   295  		}
   296  
   297  		plog.Infof("cluster healthy")
   298  		return nil
   299  	}
   300  
   301  	if err := util.Retry(15, 10*time.Second, checker); err != nil {
   302  		return fmt.Errorf("health polling failed: %v: %s", err, b)
   303  	}
   304  
   305  	return nil
   306  }