gitee.com/mysnapcore/mysnapd@v0.1.0/cmd/snap-failure/cmd_snapd.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2018 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package main 21 22 import ( 23 "encoding/json" 24 "errors" 25 "fmt" 26 "io/ioutil" 27 "os" 28 "os/exec" 29 "path/filepath" 30 "time" 31 32 "gitee.com/mysnapcore/mysnapd/dirs" 33 "gitee.com/mysnapcore/mysnapd/logger" 34 "gitee.com/mysnapcore/mysnapd/osutil" 35 ) 36 37 func init() { 38 const ( 39 short = "Run snapd failure handling" 40 long = "" 41 ) 42 43 if _, err := parser.AddCommand("snapd", short, long, &cmdSnapd{}); err != nil { 44 panic(err) 45 } 46 47 } 48 49 // We do not import anything from snapd here for safety reasons so make a 50 // copy of the relevant struct data we care about. 51 type sideInfo struct { 52 Revision string `json:"revision"` 53 } 54 55 type snapSeq struct { 56 Current string `json:"current"` 57 Sequence []sideInfo `json:"sequence"` 58 } 59 60 type cmdSnapd struct{} 61 62 var errNoSnapd = errors.New("no snapd sequence file found") 63 var errNoPrevious = errors.New("no revision to go back to") 64 65 func prevRevision(snapName string) (string, error) { 66 seqFile := filepath.Join(dirs.SnapSeqDir, snapName+".json") 67 content, err := ioutil.ReadFile(seqFile) 68 if os.IsNotExist(err) { 69 return "", errNoSnapd 70 } 71 if err != nil { 72 return "", err 73 } 74 75 var seq snapSeq 76 if err := json.Unmarshal(content, &seq); err != nil { 77 return "", fmt.Errorf("cannot parse %q sequence file: %v", filepath.Base(seqFile), err) 78 } 79 80 var prev string 81 for i, si := range seq.Sequence { 82 if seq.Current == si.Revision { 83 if i == 0 { 84 return "", errNoPrevious 85 } 86 prev = seq.Sequence[i-1].Revision 87 break 88 } 89 } 90 if prev == "" { 91 return "", fmt.Errorf("internal error: current %v not found in sequence: %+v", seq.Current, seq.Sequence) 92 } 93 94 return prev, nil 95 } 96 97 func runCmd(prog string, args []string, env []string) *exec.Cmd { 98 cmd := exec.Command(prog, args...) 99 cmd.Env = os.Environ() 100 for _, envVar := range env { 101 cmd.Env = append(cmd.Env, envVar) 102 } 103 104 cmd.Stdout = Stdout 105 cmd.Stderr = Stderr 106 107 return cmd 108 } 109 110 var ( 111 sampleForActiveInterval = 5 * time.Second 112 restartSnapdCoolOffWait = 12500 * time.Millisecond 113 ) 114 115 // FIXME: also do error reporting via errtracker 116 func (c *cmdSnapd) Execute(args []string) error { 117 var snapdPath string 118 // find previous the snapd snap 119 prevRev, err := prevRevision("snapd") 120 switch err { 121 case errNoSnapd: 122 // the snapd snap is not installed 123 return nil 124 case errNoPrevious: 125 // this is the first revision of snapd to be installed on the 126 // system, either a remodel or a plain snapd installation, call 127 // the snapd from the core snap 128 snapdPath = filepath.Join(dirs.SnapMountDir, "core", "current", "/usr/lib/snapd/snapd") 129 if !osutil.FileExists(snapdPath) { 130 // it is possible that the core snap is not installed at 131 // all, in which case we should try the snapd snap 132 snapdPath = filepath.Join(dirs.SnapMountDir, "snapd", "current", "/usr/lib/snapd/snapd") 133 } 134 prevRev = "0" 135 case nil: 136 // the snapd snap was installed before, use the previous revision 137 snapdPath = filepath.Join(dirs.SnapMountDir, "snapd", prevRev, "/usr/lib/snapd/snapd") 138 default: 139 return err 140 } 141 logger.Noticef("stopping snapd socket") 142 // stop the socket unit so that we can start snapd on its own 143 output, err := exec.Command("systemctl", "stop", "snapd.socket").CombinedOutput() 144 if err != nil { 145 return osutil.OutputErr(output, err) 146 } 147 148 logger.Noticef("restoring invoking snapd from: %v", snapdPath) 149 // start previous snapd 150 cmd := runCmd(snapdPath, nil, []string{"SNAPD_REVERT_TO_REV=" + prevRev, "SNAPD_DEBUG=1"}) 151 if err = cmd.Run(); err != nil { 152 return fmt.Errorf("snapd failed: %v", err) 153 } 154 155 isFailedCmd := runCmd("systemctl", []string{"is-failed", "snapd.socket", "snapd.service"}, nil) 156 if err := isFailedCmd.Run(); err != nil { 157 // the ephemeral snapd we invoked seems to have fixed 158 // snapd.service and snapd.socket, check whether they get 159 // reported as active for 5 * 5s 160 for i := 0; i < 5; i++ { 161 if i != 0 { 162 time.Sleep(sampleForActiveInterval) 163 } 164 isActiveCmd := runCmd("systemctl", []string{"is-active", "snapd.socket", "snapd.service"}, nil) 165 err := isActiveCmd.Run() 166 if err == nil && osutil.FileExists(dirs.SnapdSocket) && osutil.FileExists(dirs.SnapSocket) { 167 logger.Noticef("snapd is active again, sockets are available, nothing more to do") 168 return nil 169 } 170 } 171 } 172 173 logger.Noticef("restarting snapd socket") 174 // we need to reset the failure state to be able to restart again 175 resetCmd := runCmd("systemctl", []string{"reset-failed", "snapd.socket", "snapd.service"}, nil) 176 if err = resetCmd.Run(); err != nil { 177 // don't die if we fail to reset the failed state of snapd.socket, as 178 // the restart itself could still work 179 logger.Noticef("failed to reset-failed snapd.socket: %v", err) 180 } 181 // at this point our manually started snapd stopped and 182 // should have removed the /run/snap* sockets (this is a feature of 183 // golang) - we need to restart snapd.socket to make them 184 // available again. 185 186 // be extra robust and if the socket file still somehow exists delete it 187 // before restarting, otherwise the restart command will fail because the 188 // systemd can't create the file 189 // always remove to avoid TOCTOU issues but don't complain about ENOENT 190 for _, fn := range []string{dirs.SnapdSocket, dirs.SnapSocket} { 191 err = os.Remove(fn) 192 if err != nil && !os.IsNotExist(err) { 193 logger.Noticef("snapd socket %s still exists before restarting socket service, but unable to remove: %v", fn, err) 194 } 195 } 196 197 restartCmd := runCmd("systemctl", []string{"restart", "snapd.socket"}, nil) 198 if err := restartCmd.Run(); err != nil { 199 logger.Noticef("failed to restart snapd.socket: %v", err) 200 // fallback to try snapd itself 201 // wait more than DefaultStartLimitIntervalSec 202 // 203 // TODO: consider parsing 204 // systemctl show snapd -p StartLimitIntervalUSec 205 // might need system-analyze timespan which is relatively new 206 // for the general case 207 time.Sleep(restartSnapdCoolOffWait) 208 logger.Noticef("fallback, restarting snapd itself") 209 restartCmd := runCmd("systemctl", []string{"restart", "snapd.service"}, nil) 210 if err := restartCmd.Run(); err != nil { 211 logger.Noticef("failed to restart snapd: %v", err) 212 } 213 } 214 215 return nil 216 }