github.com/coreos/mantle@v0.13.0/kola/tests/misc/update.go (about)

     1  // Copyright 2017 CoreOS, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package misc
    16  
    17  import (
    18  	"fmt"
    19  	"regexp"
    20  	"time"
    21  
    22  	"golang.org/x/net/context"
    23  
    24  	"github.com/coreos/mantle/kola/cluster"
    25  	"github.com/coreos/mantle/kola/register"
    26  	"github.com/coreos/mantle/kola/tests/util"
    27  	"github.com/coreos/mantle/platform"
    28  	"github.com/coreos/mantle/platform/conf"
    29  )
    30  
    31  var (
    32  	// prevents a race where update-engine sets the boot partition back to
    33  	// USR-A after the test sets it to USR-B
    34  	disableUpdateEngine = conf.ContainerLinuxConfig(`systemd:
    35    units:
    36      - name: update-engine.service
    37        mask: true
    38      - name: locksmithd.service
    39        mask: true`)
    40  )
    41  
    42  func init() {
    43  	register.Register(&register.Test{
    44  		Run:         RebootIntoUSRB,
    45  		ClusterSize: 1,
    46  		Name:        "cl.update.reboot",
    47  		UserData:    disableUpdateEngine,
    48  		Distros:     []string{"cl"},
    49  	})
    50  	register.Register(&register.Test{
    51  		Run:         RecoverBadVerity,
    52  		ClusterSize: 1,
    53  		Name:        "cl.update.badverity",
    54  		Flags:       []register.Flag{register.NoEmergencyShellCheck},
    55  		UserData:    disableUpdateEngine,
    56  		Distros:     []string{"cl"},
    57  	})
    58  	register.Register(&register.Test{
    59  		Run:         RecoverBadUsr,
    60  		ClusterSize: 1,
    61  		Name:        "cl.update.badusr",
    62  		Flags:       []register.Flag{register.NoEmergencyShellCheck},
    63  		UserData:    disableUpdateEngine,
    64  		Distros:     []string{"cl"},
    65  	})
    66  }
    67  
    68  // Simulate update scenarios
    69  
    70  // Check that we can reprioritize and boot into USR-B. This largely
    71  // validates the other tests in this file.
    72  func RebootIntoUSRB(c cluster.TestCluster) {
    73  	m := c.Machines()[0]
    74  
    75  	util.AssertBootedUsr(c, m, "USR-A")
    76  
    77  	// copy USR-A to USR-B
    78  	c.MustSSH(m, "sudo dd if=/dev/disk/by-partlabel/USR-A of=/dev/disk/by-partlabel/USR-B bs=10M status=none")
    79  
    80  	// copy kernel
    81  	c.MustSSH(m, "sudo cp /boot/coreos/vmlinuz-a /boot/coreos/vmlinuz-b")
    82  
    83  	prioritizeUsr(c, m, "USR-B")
    84  	if err := m.Reboot(); err != nil {
    85  		c.Fatalf("couldn't reboot: %v", err)
    86  	}
    87  	util.AssertBootedUsr(c, m, "USR-B")
    88  }
    89  
    90  // Verify that we reboot into the old image after the new image fails a
    91  // verity check.
    92  func RecoverBadVerity(c cluster.TestCluster) {
    93  	m := c.Machines()[0]
    94  
    95  	skipUnlessVerity(c, m)
    96  
    97  	util.AssertBootedUsr(c, m, "USR-A")
    98  
    99  	// copy USR-A to USR-B
   100  	c.MustSSH(m, "sudo dd if=/dev/disk/by-partlabel/USR-A of=/dev/disk/by-partlabel/USR-B bs=10M status=none")
   101  
   102  	// copy kernel
   103  	c.MustSSH(m, "sudo cp /boot/coreos/vmlinuz-a /boot/coreos/vmlinuz-b")
   104  
   105  	// invalidate verity hash on B kernel
   106  	c.MustSSH(m, "sudo dd of=/boot/coreos/vmlinuz-b bs=1 seek=64 count=64 conv=notrunc status=none <<<0000000000000000000000000000000000000000000000000000000000000000")
   107  
   108  	prioritizeUsr(c, m, "USR-B")
   109  	rebootWithEmergencyShellTimeout(c, m)
   110  	util.AssertBootedUsr(c, m, "USR-A")
   111  }
   112  
   113  // Verify that we reboot into the old image when the new image is an
   114  // unreasonable filesystem (an empty one) that passes verity.
   115  func RecoverBadUsr(c cluster.TestCluster) {
   116  	m := c.Machines()[0]
   117  
   118  	util.AssertBootedUsr(c, m, "USR-A")
   119  
   120  	// create filesystem for USR-B
   121  	c.MustSSH(m, "sudo mkfs.ext4 -q -b 4096 /dev/disk/by-partlabel/USR-B 25600")
   122  
   123  	// create verity metadata for USR-B
   124  	output := c.MustSSH(m, "sudo veritysetup format --hash=sha256 "+
   125  		"--data-block-size 4096 --hash-block-size 4096 --data-blocks 25600 --hash-offset 104857600 "+
   126  		"/dev/disk/by-partlabel/USR-B /dev/disk/by-partlabel/USR-B")
   127  
   128  	// extract root hash for USR-B
   129  	match := regexp.MustCompile("\nRoot hash:\\s+([0-9a-f]+)").FindSubmatch(output)
   130  	if match == nil {
   131  		c.Fatalf("Couldn't obtain new root hash; output %s", output)
   132  	}
   133  	verityHash := match[1]
   134  
   135  	// copy kernel
   136  	c.MustSSH(m, "sudo cp /boot/coreos/vmlinuz-a /boot/coreos/vmlinuz-b")
   137  
   138  	// update verity hash on B kernel
   139  	c.MustSSH(m, fmt.Sprintf("sudo dd of=/boot/coreos/vmlinuz-b bs=1 seek=64 count=64 conv=notrunc status=none <<<%s", verityHash))
   140  
   141  	prioritizeUsr(c, m, "USR-B")
   142  	rebootWithEmergencyShellTimeout(c, m)
   143  	util.AssertBootedUsr(c, m, "USR-A")
   144  }
   145  
   146  func prioritizeUsr(c cluster.TestCluster, m platform.Machine, usr string) {
   147  	c.MustSSH(m, "sudo cgpt repair /dev/disk/by-partlabel/"+usr)
   148  	c.MustSSH(m, "sudo cgpt add -S0 -T1 /dev/disk/by-partlabel/"+usr)
   149  	c.MustSSH(m, "sudo cgpt prioritize /dev/disk/by-partlabel/"+usr)
   150  }
   151  
   152  // reboot, waiting extra-long for the 5-minute emergency shell timeout
   153  func rebootWithEmergencyShellTimeout(c cluster.TestCluster, m platform.Machine) {
   154  	// reboot; wait extra 5 minutes; check machine
   155  	// this defeats some of the machinery in m.Reboot()
   156  	if err := platform.StartReboot(m); err != nil {
   157  		c.Fatal(err)
   158  	}
   159  	time.Sleep(5 * time.Minute)
   160  	if err := platform.CheckMachine(context.TODO(), m); err != nil {
   161  		c.Fatal(err)
   162  	}
   163  }