github.com/tompreston/snapd@v0.0.0-20210817193607-954edfcb9611/cmd/snap-failure/cmd_snapd.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2018 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package main
    21  
    22  import (
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"io/ioutil"
    27  	"os"
    28  	"os/exec"
    29  	"path/filepath"
    30  	"time"
    31  
    32  	"github.com/snapcore/snapd/dirs"
    33  	"github.com/snapcore/snapd/logger"
    34  	"github.com/snapcore/snapd/osutil"
    35  )
    36  
    37  func init() {
    38  	const (
    39  		short = "Run snapd failure handling"
    40  		long  = ""
    41  	)
    42  
    43  	if _, err := parser.AddCommand("snapd", short, long, &cmdSnapd{}); err != nil {
    44  		panic(err)
    45  	}
    46  
    47  }
    48  
    49  // We do not import anything from snapd here for safety reasons so make a
    50  // copy of the relevant struct data we care about.
    51  type sideInfo struct {
    52  	Revision string `json:"revision"`
    53  }
    54  
    55  type snapSeq struct {
    56  	Current  string     `json:"current"`
    57  	Sequence []sideInfo `json:"sequence"`
    58  }
    59  
    60  type cmdSnapd struct{}
    61  
    62  var errNoSnapd = errors.New("no snapd sequence file found")
    63  var errNoPrevious = errors.New("no revision to go back to")
    64  
    65  func prevRevision(snapName string) (string, error) {
    66  	seqFile := filepath.Join(dirs.SnapSeqDir, snapName+".json")
    67  	content, err := ioutil.ReadFile(seqFile)
    68  	if os.IsNotExist(err) {
    69  		return "", errNoSnapd
    70  	}
    71  	if err != nil {
    72  		return "", err
    73  	}
    74  
    75  	var seq snapSeq
    76  	if err := json.Unmarshal(content, &seq); err != nil {
    77  		return "", fmt.Errorf("cannot parse %q sequence file: %v", filepath.Base(seqFile), err)
    78  	}
    79  
    80  	var prev string
    81  	for i, si := range seq.Sequence {
    82  		if seq.Current == si.Revision {
    83  			if i == 0 {
    84  				return "", errNoPrevious
    85  			}
    86  			prev = seq.Sequence[i-1].Revision
    87  			break
    88  		}
    89  	}
    90  	if prev == "" {
    91  		return "", fmt.Errorf("internal error: current %v not found in sequence: %+v", seq.Current, seq.Sequence)
    92  	}
    93  
    94  	return prev, nil
    95  }
    96  
    97  func runCmd(prog string, args []string, env []string) *exec.Cmd {
    98  	cmd := exec.Command(prog, args...)
    99  	cmd.Env = os.Environ()
   100  	for _, envVar := range env {
   101  		cmd.Env = append(cmd.Env, envVar)
   102  	}
   103  
   104  	cmd.Stdout = Stdout
   105  	cmd.Stderr = Stderr
   106  
   107  	return cmd
   108  }
   109  
   110  var (
   111  	sampleForActiveInterval = 5 * time.Second
   112  	restartSnapdCoolOffWait = 12500 * time.Millisecond
   113  )
   114  
   115  // FIXME: also do error reporting via errtracker
   116  func (c *cmdSnapd) Execute(args []string) error {
   117  	var snapdPath string
   118  	// find previous the snapd snap
   119  	prevRev, err := prevRevision("snapd")
   120  	switch err {
   121  	case errNoSnapd:
   122  		// the snapd snap is not installed
   123  		return nil
   124  	case errNoPrevious:
   125  		// this is the first revision of snapd to be installed on the
   126  		// system, either a remodel or a plain snapd installation, call
   127  		// the snapd from the core snap
   128  		snapdPath = filepath.Join(dirs.SnapMountDir, "core", "current", "/usr/lib/snapd/snapd")
   129  		prevRev = "0"
   130  	case nil:
   131  		// the snapd snap was installed before, use the previous revision
   132  		snapdPath = filepath.Join(dirs.SnapMountDir, "snapd", prevRev, "/usr/lib/snapd/snapd")
   133  	default:
   134  		return err
   135  	}
   136  	logger.Noticef("stopping snapd socket")
   137  	// stop the socket unit so that we can start snapd on its own
   138  	output, err := exec.Command("systemctl", "stop", "snapd.socket").CombinedOutput()
   139  	if err != nil {
   140  		return osutil.OutputErr(output, err)
   141  	}
   142  
   143  	logger.Noticef("restoring invoking snapd from: %v", snapdPath)
   144  	// start previous snapd
   145  	cmd := runCmd(snapdPath, nil, []string{"SNAPD_REVERT_TO_REV=" + prevRev, "SNAPD_DEBUG=1"})
   146  	if err = cmd.Run(); err != nil {
   147  		return fmt.Errorf("snapd failed: %v", err)
   148  	}
   149  
   150  	isFailedCmd := runCmd("systemctl", []string{"is-failed", "snapd.socket", "snapd.service"}, nil)
   151  	if err := isFailedCmd.Run(); err != nil {
   152  		// the ephemeral snapd we invoked seems to have fixed
   153  		// snapd.service and snapd.socket, check whether they get
   154  		// reported as active for 5 * 5s
   155  		for i := 0; i < 5; i++ {
   156  			if i != 0 {
   157  				time.Sleep(sampleForActiveInterval)
   158  			}
   159  			isActiveCmd := runCmd("systemctl", []string{"is-active", "snapd.socket", "snapd.service"}, nil)
   160  			err := isActiveCmd.Run()
   161  			if err == nil && osutil.FileExists(dirs.SnapdSocket) && osutil.FileExists(dirs.SnapSocket) {
   162  				logger.Noticef("snapd is active again, sockets are available, nothing more to do")
   163  				return nil
   164  			}
   165  		}
   166  	}
   167  
   168  	logger.Noticef("restarting snapd socket")
   169  	// we need to reset the failure state to be able to restart again
   170  	resetCmd := runCmd("systemctl", []string{"reset-failed", "snapd.socket", "snapd.service"}, nil)
   171  	if err = resetCmd.Run(); err != nil {
   172  		// don't die if we fail to reset the failed state of snapd.socket, as
   173  		// the restart itself could still work
   174  		logger.Noticef("failed to reset-failed snapd.socket: %v", err)
   175  	}
   176  	// at this point our manually started snapd stopped and
   177  	// should have removed the /run/snap* sockets (this is a feature of
   178  	// golang) - we need to restart snapd.socket to make them
   179  	// available again.
   180  
   181  	// be extra robust and if the socket file still somehow exists delete it
   182  	// before restarting, otherwise the restart command will fail because the
   183  	// systemd can't create the file
   184  	// always remove to avoid TOCTOU issues but don't complain about ENOENT
   185  	for _, fn := range []string{dirs.SnapdSocket, dirs.SnapSocket} {
   186  		err = os.Remove(fn)
   187  		if err != nil && !os.IsNotExist(err) {
   188  			logger.Noticef("snapd socket %s still exists before restarting socket service, but unable to remove: %v", fn, err)
   189  		}
   190  	}
   191  
   192  	restartCmd := runCmd("systemctl", []string{"restart", "snapd.socket"}, nil)
   193  	if err := restartCmd.Run(); err != nil {
   194  		logger.Noticef("failed to restart snapd.socket: %v", err)
   195  		// fallback to try snapd itself
   196  		// wait more than DefaultStartLimitIntervalSec
   197  		//
   198  		// TODO: consider parsing
   199  		// systemctl show snapd -p StartLimitIntervalUSec
   200  		// might need system-analyze timespan which is relatively new
   201  		// for the general case
   202  		time.Sleep(restartSnapdCoolOffWait)
   203  		logger.Noticef("fallback, restarting snapd itself")
   204  		restartCmd := runCmd("systemctl", []string{"restart", "snapd.service"}, nil)
   205  		if err := restartCmd.Run(); err != nil {
   206  			logger.Noticef("failed to restart snapd: %v", err)
   207  		}
   208  	}
   209  
   210  	return nil
   211  }