github.com/hugh712/snapd@v0.0.0-20200910133618-1a99902bd583/cmd/snap-failure/cmd_snapd.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package main 21 22 import ( 23 "encoding/json" 24 "errors" 25 "fmt" 26 "io/ioutil" 27 "os" 28 "os/exec" 29 "path/filepath" 30 "time" 31 32 "github.com/snapcore/snapd/dirs" 33 "github.com/snapcore/snapd/logger" 34 "github.com/snapcore/snapd/osutil" 35 ) 36 37 func init() { 38 const ( 39 short = "Run snapd failure handling" 40 long = "" 41 ) 42 43 if _, err := parser.AddCommand("snapd", short, long, &cmdSnapd{}); err != nil { 44 panic(err) 45 } 46 47 } 48 49 // We do not import anything from snapd here for safety reasons so make a 50 // copy of the relevant struct data we care about. 51 type sideInfo struct { 52 Revision string `json:"revision"` 53 } 54 55 type snapSeq struct { 56 Current string `json:"current"` 57 Sequence []sideInfo `json:"sequence"` 58 } 59 60 type cmdSnapd struct{} 61 62 var errNoSnapd = errors.New("no snapd sequence file found") 63 var errNoPrevious = errors.New("no revision to go back to") 64 65 func prevRevision(snapName string) (string, error) { 66 seqFile := filepath.Join(dirs.SnapSeqDir, snapName+".json") 67 content, err := ioutil.ReadFile(seqFile) 68 if os.IsNotExist(err) { 69 return "", errNoSnapd 70 } 71 if err != nil { 72 return "", err 73 } 74 75 var seq snapSeq 76 if err := json.Unmarshal(content, &seq); err != nil { 77 return "", fmt.Errorf("cannot parse %q sequence file: %v", filepath.Base(seqFile), err) 78 } 79 80 var prev string 81 for i, si := range seq.Sequence { 82 if seq.Current == si.Revision { 83 if i == 0 { 84 return "", errNoPrevious 85 } 86 prev = seq.Sequence[i-1].Revision 87 break 88 } 89 } 90 if prev == "" { 91 return "", fmt.Errorf("internal error: current %v not found in sequence: %+v", seq.Current, seq.Sequence) 92 } 93 94 return prev, nil 95 } 96 97 func runCmd(prog string, args []string, env []string) *exec.Cmd { 98 cmd := exec.Command(prog, args...) 99 cmd.Env = os.Environ() 100 for _, envVar := range env { 101 cmd.Env = append(cmd.Env, envVar) 102 } 103 104 cmd.Stdout = Stdout 105 cmd.Stderr = Stderr 106 107 return cmd 108 } 109 110 var ( 111 sampleForActiveInterval = 5 * time.Second 112 restartSnapdCoolOffWait = 12500 * time.Millisecond 113 ) 114 115 // FIXME: also do error reporting via errtracker 116 func (c *cmdSnapd) Execute(args []string) error { 117 var snapdPath string 118 // find previous the snapd snap 119 prevRev, err := prevRevision("snapd") 120 switch err { 121 case errNoSnapd: 122 // the snapd snap is not installed 123 return nil 124 case errNoPrevious: 125 // this is the first revision of snapd to be installed on the 126 // system, either a remodel or a plain snapd installation, call 127 // the snapd from the core snap 128 snapdPath = filepath.Join(dirs.SnapMountDir, "core", "current", "/usr/lib/snapd/snapd") 129 prevRev = "0" 130 case nil: 131 // the snapd snap was installed before, use the previous revision 132 snapdPath = filepath.Join(dirs.SnapMountDir, "snapd", prevRev, "/usr/lib/snapd/snapd") 133 default: 134 return err 135 } 136 logger.Noticef("stopping snapd socket") 137 // stop the socket unit so that we can start snapd on its own 138 output, err := exec.Command("systemctl", "stop", "snapd.socket").CombinedOutput() 139 if err != nil { 140 return osutil.OutputErr(output, err) 141 } 142 143 logger.Noticef("restoring invoking snapd from: %v", snapdPath) 144 // start previous snapd 145 cmd := runCmd(snapdPath, nil, []string{"SNAPD_REVERT_TO_REV=" + prevRev, "SNAPD_DEBUG=1"}) 146 if err = cmd.Run(); err != nil { 147 return fmt.Errorf("snapd failed: %v", err) 148 } 149 150 isFailedCmd := runCmd("systemctl", []string{"is-failed", "snapd.socket", "snapd.service"}, nil) 151 if err := isFailedCmd.Run(); err != nil { 152 // the ephemeral snapd we invoked seems to have fixed 153 // snapd.service and snapd.socket, check whether they get 154 // reported as active for 5 * 5s 155 for i := 0; i < 5; i++ { 156 if i != 0 { 157 time.Sleep(sampleForActiveInterval) 158 } 159 isActiveCmd := runCmd("systemctl", []string{"is-active", "snapd.socket", "snapd.service"}, nil) 160 err := isActiveCmd.Run() 161 if err == nil && osutil.FileExists(dirs.SnapdSocket) && osutil.FileExists(dirs.SnapSocket) { 162 logger.Noticef("snapd is active again, sockets are available, nothing more to do") 163 return nil 164 } 165 } 166 } 167 168 logger.Noticef("restarting snapd socket") 169 // we need to reset the failure state to be able to restart again 170 resetCmd := runCmd("systemctl", []string{"reset-failed", "snapd.socket", "snapd.service"}, nil) 171 if err = resetCmd.Run(); err != nil { 172 // don't die if we fail to reset the failed state of snapd.socket, as 173 // the restart itself could still work 174 logger.Noticef("failed to reset-failed snapd.socket: %v", err) 175 } 176 // at this point our manually started snapd stopped and 177 // should have removed the /run/snap* sockets (this is a feature of 178 // golang) - we need to restart snapd.socket to make them 179 // available again. 180 181 // be extra robust and if the socket file still somehow exists delete it 182 // before restarting, otherwise the restart command will fail because the 183 // systemd can't create the file 184 // always remove to avoid TOCTOU issues but don't complain about ENOENT 185 for _, fn := range []string{dirs.SnapdSocket, dirs.SnapSocket} { 186 err = os.Remove(fn) 187 if err != nil && !os.IsNotExist(err) { 188 logger.Noticef("snapd socket %s still exists before restarting socket service, but unable to remove: %v", fn, err) 189 } 190 } 191 192 restartCmd := runCmd("systemctl", []string{"restart", "snapd.socket"}, nil) 193 if err := restartCmd.Run(); err != nil { 194 logger.Noticef("failed to restart snapd.socket: %v", err) 195 // fallback to try snapd itself 196 // wait more than DefaultStartLimitIntervalSec 197 // 198 // TODO: consider parsing 199 // systemctl show snapd -p StartLimitIntervalUSec 200 // might need system-analyze timespan which is relatively new 201 // for the general case 202 time.Sleep(restartSnapdCoolOffWait) 203 logger.Noticef("fallback, restarting snapd itself") 204 restartCmd := runCmd("systemctl", []string{"restart", "snapd.service"}, nil) 205 if err := restartCmd.Run(); err != nil { 206 logger.Noticef("failed to restart snapd: %v", err) 207 } 208 } 209 210 return nil 211 }