github.com/hugh712/snapd@v0.0.0-20200910133618-1a99902bd583/daemon/daemon.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2015-2020 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package daemon 21 22 import ( 23 "context" 24 "fmt" 25 "net" 26 "net/http" 27 "os" 28 "os/exec" 29 "os/signal" 30 "strconv" 31 "strings" 32 "sync" 33 "time" 34 35 "github.com/gorilla/mux" 36 "gopkg.in/tomb.v2" 37 38 "github.com/snapcore/snapd/client" 39 "github.com/snapcore/snapd/dirs" 40 "github.com/snapcore/snapd/i18n" 41 "github.com/snapcore/snapd/logger" 42 "github.com/snapcore/snapd/netutil" 43 "github.com/snapcore/snapd/osutil" 44 "github.com/snapcore/snapd/overlord" 45 "github.com/snapcore/snapd/overlord/auth" 46 "github.com/snapcore/snapd/overlord/standby" 47 "github.com/snapcore/snapd/overlord/state" 48 "github.com/snapcore/snapd/polkit" 49 "github.com/snapcore/snapd/snapdenv" 50 "github.com/snapcore/snapd/store" 51 "github.com/snapcore/snapd/systemd" 52 ) 53 54 var ErrRestartSocket = fmt.Errorf("daemon stop requested to wait for socket activation") 55 56 var systemdSdNotify = systemd.SdNotify 57 58 // A Daemon listens for requests and routes them to the right command 59 type Daemon struct { 60 Version string 61 overlord *overlord.Overlord 62 state *state.State 63 snapdListener net.Listener 64 snapListener net.Listener 65 connTracker *connTracker 66 serve *http.Server 67 tomb tomb.Tomb 68 router *mux.Router 69 standbyOpinions *standby.StandbyOpinions 70 71 // set to remember we need to restart the system 72 restartSystem state.RestartType 73 // set to remember that we need to exit the daemon in a way that 74 // prevents systemd from restarting it 75 restartSocket bool 76 // degradedErr is set when the daemon is in degraded mode 77 degradedErr error 78 79 expectedRebootDidNotHappen bool 80 81 mu sync.Mutex 82 } 83 84 // A ResponseFunc handles one of the individual verbs for a method 85 type ResponseFunc func(*Command, *http.Request, *auth.UserState) Response 86 87 // A Command routes a request to an individual per-verb ResponseFUnc 88 type Command struct { 89 Path string 90 PathPrefix string 91 // 92 GET ResponseFunc 93 PUT ResponseFunc 94 POST ResponseFunc 95 // can guest GET? 96 GuestOK bool 97 // can non-admin GET? 98 UserOK bool 99 // is this path accessible on the snapd-snap socket? 100 SnapOK bool 101 // this path is only accessible to root 102 RootOnly bool 103 104 // can polkit grant access? set to polkit action ID if so 105 PolkitOK string 106 107 d *Daemon 108 } 109 110 type accessResult int 111 112 const ( 113 accessOK accessResult = iota 114 accessUnauthorized 115 accessForbidden 116 accessCancelled 117 ) 118 119 var polkitCheckAuthorization = polkit.CheckAuthorization 120 121 // canAccess checks the following properties: 122 // 123 // - if the user is `root` everything is allowed 124 // - if a user is logged in (via `snap login`) and the command doesn't have RootOnly, everything is allowed 125 // - POST/PUT all require `root`, or just `snap login` if not RootOnly 126 // 127 // Otherwise for GET requests the following parameters are honored: 128 // - GuestOK: anyone can access GET 129 // - UserOK: any uid on the local system can access GET 130 // - RootOnly: only root can access this 131 // - SnapOK: a snap can access this via `snapctl` 132 func (c *Command) canAccess(r *http.Request, user *auth.UserState) accessResult { 133 if c.RootOnly && (c.UserOK || c.GuestOK || c.SnapOK) { 134 // programming error 135 logger.Panicf("Command can't have RootOnly together with any *OK flag") 136 } 137 138 if user != nil && !c.RootOnly { 139 // Authenticated users do anything not requiring explicit root. 140 return accessOK 141 } 142 143 // isUser means we have a UID for the request 144 isUser := false 145 pid, uid, socket, err := ucrednetGet(r.RemoteAddr) 146 if err == nil { 147 isUser = true 148 } else if err != errNoID { 149 logger.Noticef("unexpected error when attempting to get UID: %s", err) 150 return accessForbidden 151 } 152 isSnap := (socket == dirs.SnapSocket) 153 154 // ensure that snaps can only access SnapOK things 155 if isSnap { 156 if c.SnapOK { 157 return accessOK 158 } 159 return accessUnauthorized 160 } 161 162 // the !RootOnly check is redundant, but belt-and-suspenders 163 if r.Method == "GET" && !c.RootOnly { 164 // Guest and user access restricted to GET requests 165 if c.GuestOK { 166 return accessOK 167 } 168 169 if isUser && c.UserOK { 170 return accessOK 171 } 172 } 173 174 // Remaining admin checks rely on identifying peer uid 175 if !isUser { 176 return accessUnauthorized 177 } 178 179 if uid == 0 { 180 // Superuser does anything. 181 return accessOK 182 } 183 184 if c.RootOnly { 185 return accessUnauthorized 186 } 187 188 if c.PolkitOK != "" { 189 var flags polkit.CheckFlags 190 allowHeader := r.Header.Get(client.AllowInteractionHeader) 191 if allowHeader != "" { 192 if allow, err := strconv.ParseBool(allowHeader); err != nil { 193 logger.Noticef("error parsing %s header: %s", client.AllowInteractionHeader, err) 194 } else if allow { 195 flags |= polkit.CheckAllowInteraction 196 } 197 } 198 // Pass both pid and uid from the peer ucred to avoid pid race 199 if authorized, err := polkitCheckAuthorization(pid, uid, c.PolkitOK, nil, flags); err == nil { 200 if authorized { 201 // polkit says user is authorised 202 return accessOK 203 } 204 } else if err == polkit.ErrDismissed { 205 return accessCancelled 206 } else { 207 logger.Noticef("polkit error: %s", err) 208 } 209 } 210 211 return accessUnauthorized 212 } 213 214 func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) { 215 st := c.d.state 216 st.Lock() 217 // TODO Look at the error and fail if there's an attempt to authenticate with invalid data. 218 user, _ := UserFromRequest(st, r) 219 st.Unlock() 220 221 // check if we are in degradedMode 222 if c.d.degradedErr != nil && r.Method != "GET" { 223 InternalError(c.d.degradedErr.Error()).ServeHTTP(w, r) 224 return 225 } 226 227 switch c.canAccess(r, user) { 228 case accessOK: 229 // nothing 230 case accessUnauthorized: 231 Unauthorized("access denied").ServeHTTP(w, r) 232 return 233 case accessForbidden: 234 Forbidden("forbidden").ServeHTTP(w, r) 235 return 236 case accessCancelled: 237 AuthCancelled("cancelled").ServeHTTP(w, r) 238 return 239 } 240 241 ctx := store.WithClientUserAgent(r.Context(), r) 242 r = r.WithContext(ctx) 243 244 var rspf ResponseFunc 245 var rsp = MethodNotAllowed("method %q not allowed", r.Method) 246 247 switch r.Method { 248 case "GET": 249 rspf = c.GET 250 case "PUT": 251 rspf = c.PUT 252 case "POST": 253 rspf = c.POST 254 } 255 256 if rspf != nil { 257 rsp = rspf(c, r, user) 258 } 259 260 if rsp, ok := rsp.(*resp); ok { 261 _, rst := st.Restarting() 262 switch rst { 263 case state.RestartSystem, state.RestartSystemNow: 264 rsp.transmitMaintenance(client.ErrorKindSystemRestart, "system is restarting") 265 case state.RestartDaemon: 266 rsp.transmitMaintenance(client.ErrorKindDaemonRestart, "daemon is restarting") 267 case state.RestartSocket: 268 rsp.transmitMaintenance(client.ErrorKindDaemonRestart, "daemon is stopping to wait for socket activation") 269 } 270 if rsp.Type != ResponseTypeError { 271 st.Lock() 272 count, stamp := st.WarningsSummary() 273 st.Unlock() 274 rsp.addWarningsToMeta(count, stamp) 275 } 276 } 277 278 rsp.ServeHTTP(w, r) 279 } 280 281 type wrappedWriter struct { 282 w http.ResponseWriter 283 s int 284 } 285 286 func (w *wrappedWriter) Header() http.Header { 287 return w.w.Header() 288 } 289 290 func (w *wrappedWriter) Write(bs []byte) (int, error) { 291 return w.w.Write(bs) 292 } 293 294 func (w *wrappedWriter) WriteHeader(s int) { 295 w.w.WriteHeader(s) 296 w.s = s 297 } 298 299 func (w *wrappedWriter) Flush() { 300 if f, ok := w.w.(http.Flusher); ok { 301 f.Flush() 302 } 303 } 304 305 func logit(handler http.Handler) http.Handler { 306 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 307 ww := &wrappedWriter{w: w} 308 t0 := time.Now() 309 handler.ServeHTTP(ww, r) 310 t := time.Now().Sub(t0) 311 url := r.URL.String() 312 if !strings.Contains(url, "/changes/") { 313 logger.Debugf("%s %s %s %s %d", r.RemoteAddr, r.Method, r.URL, t, ww.s) 314 } 315 }) 316 } 317 318 // Init sets up the Daemon's internal workings. 319 // Don't call more than once. 320 func (d *Daemon) Init() error { 321 listenerMap, err := netutil.ActivationListeners() 322 if err != nil { 323 return err 324 } 325 326 // The SnapdSocket is required-- without it, die. 327 if listener, err := netutil.GetListener(dirs.SnapdSocket, listenerMap); err == nil { 328 d.snapdListener = &ucrednetListener{Listener: listener} 329 } else { 330 return fmt.Errorf("when trying to listen on %s: %v", dirs.SnapdSocket, err) 331 } 332 333 if listener, err := netutil.GetListener(dirs.SnapSocket, listenerMap); err == nil { 334 // This listener may also be nil if that socket wasn't among 335 // the listeners, so check it before using it. 336 d.snapListener = &ucrednetListener{Listener: listener} 337 } else { 338 logger.Debugf("cannot get listener for %q: %v", dirs.SnapSocket, err) 339 } 340 341 d.addRoutes() 342 343 logger.Noticef("started %v.", snapdenv.UserAgent()) 344 345 return nil 346 } 347 348 // SetDegradedMode puts the daemon into an degraded mode which will the 349 // error given in the "err" argument for commands that are not marked 350 // as readonlyOK. 351 // 352 // This is useful to report errors to the client when the daemon 353 // cannot work because e.g. a sanity check failed or the system is out 354 // of diskspace. 355 // 356 // When the system is fine again calling "DegradedMode(nil)" is enough 357 // to put the daemon into full operation again. 358 func (d *Daemon) SetDegradedMode(err error) { 359 d.degradedErr = err 360 } 361 362 func (d *Daemon) addRoutes() { 363 d.router = mux.NewRouter() 364 365 for _, c := range api { 366 c.d = d 367 if c.PathPrefix == "" { 368 d.router.Handle(c.Path, c).Name(c.Path) 369 } else { 370 d.router.PathPrefix(c.PathPrefix).Handler(c).Name(c.PathPrefix) 371 } 372 } 373 374 // also maybe add a /favicon.ico handler... 375 376 d.router.NotFoundHandler = NotFound("not found") 377 } 378 379 var ( 380 shutdownTimeout = 25 * time.Second 381 ) 382 383 type connTracker struct { 384 mu sync.Mutex 385 conns map[net.Conn]struct{} 386 } 387 388 func (ct *connTracker) CanStandby() bool { 389 ct.mu.Lock() 390 defer ct.mu.Unlock() 391 392 return len(ct.conns) == 0 393 } 394 395 func (ct *connTracker) trackConn(conn net.Conn, state http.ConnState) { 396 ct.mu.Lock() 397 defer ct.mu.Unlock() 398 // we ignore hijacked connections, if we do things with websockets 399 // we'll need custom shutdown handling for them 400 if state == http.StateNew || state == http.StateActive { 401 ct.conns[conn] = struct{}{} 402 } else { 403 delete(ct.conns, conn) 404 } 405 } 406 407 func (d *Daemon) initStandbyHandling() { 408 d.standbyOpinions = standby.New(d.state) 409 d.standbyOpinions.AddOpinion(d.connTracker) 410 d.standbyOpinions.AddOpinion(d.overlord) 411 d.standbyOpinions.AddOpinion(d.overlord.SnapManager()) 412 d.standbyOpinions.AddOpinion(d.overlord.DeviceManager()) 413 d.standbyOpinions.Start() 414 } 415 416 // Start the Daemon 417 func (d *Daemon) Start() error { 418 if d.expectedRebootDidNotHappen { 419 // we need to schedule and wait for a system restart 420 d.tomb.Kill(nil) 421 // avoid systemd killing us again while we wait 422 systemdSdNotify("READY=1") 423 return nil 424 } 425 if d.overlord == nil { 426 panic("internal error: no Overlord") 427 } 428 429 to, reasoning, err := d.overlord.StartupTimeout() 430 if err != nil { 431 return err 432 } 433 if to > 0 { 434 to = to.Round(time.Microsecond) 435 us := to.Nanoseconds() / 1000 436 logger.Noticef("adjusting startup timeout by %v (%s)", to, reasoning) 437 systemdSdNotify(fmt.Sprintf("EXTEND_TIMEOUT_USEC=%d", us)) 438 } 439 // now perform expensive overlord/manages initiliazation 440 if err := d.overlord.StartUp(); err != nil { 441 return err 442 } 443 444 d.connTracker = &connTracker{conns: make(map[net.Conn]struct{})} 445 d.serve = &http.Server{ 446 Handler: logit(d.router), 447 ConnState: d.connTracker.trackConn, 448 } 449 450 // enable standby handling 451 d.initStandbyHandling() 452 453 // the loop runs in its own goroutine 454 d.overlord.Loop() 455 456 d.tomb.Go(func() error { 457 if d.snapListener != nil { 458 d.tomb.Go(func() error { 459 if err := d.serve.Serve(d.snapListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive { 460 return err 461 } 462 463 return nil 464 }) 465 } 466 467 if err := d.serve.Serve(d.snapdListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive { 468 return err 469 } 470 471 return nil 472 }) 473 474 // notify systemd that we are ready 475 systemdSdNotify("READY=1") 476 return nil 477 } 478 479 // HandleRestart implements overlord.RestartBehavior. 480 func (d *Daemon) HandleRestart(t state.RestartType) { 481 // die when asked to restart (systemd should get us back up!) etc 482 switch t { 483 case state.RestartDaemon: 484 case state.RestartSystem, state.RestartSystemNow: 485 // try to schedule a fallback slow reboot already here 486 // in case we get stuck shutting down 487 if err := reboot(rebootWaitTimeout); err != nil { 488 logger.Noticef("%s", err) 489 } 490 491 d.mu.Lock() 492 defer d.mu.Unlock() 493 // remember we need to restart the system 494 d.restartSystem = t 495 case state.RestartSocket: 496 d.mu.Lock() 497 defer d.mu.Unlock() 498 d.restartSocket = true 499 case state.StopDaemon: 500 logger.Noticef("stopping snapd as requested") 501 default: 502 logger.Noticef("internal error: restart handler called with unknown restart type: %v", t) 503 } 504 d.tomb.Kill(nil) 505 } 506 507 var ( 508 rebootNoticeWait = 3 * time.Second 509 rebootWaitTimeout = 10 * time.Minute 510 rebootRetryWaitTimeout = 5 * time.Minute 511 rebootMaxTentatives = 3 512 ) 513 514 // Stop shuts down the Daemon 515 func (d *Daemon) Stop(sigCh chan<- os.Signal) error { 516 // we need to schedule/wait for a system restart again 517 if d.expectedRebootDidNotHappen { 518 // make the reboot retry immediate 519 immediateReboot := true 520 return d.doReboot(sigCh, immediateReboot, rebootRetryWaitTimeout) 521 } 522 if d.overlord == nil { 523 return fmt.Errorf("internal error: no Overlord") 524 } 525 526 d.tomb.Kill(nil) 527 528 d.mu.Lock() 529 restartSystem := d.restartSystem != state.RestartUnset 530 immediateReboot := d.restartSystem == state.RestartSystemNow 531 restartSocket := d.restartSocket 532 d.mu.Unlock() 533 534 d.snapdListener.Close() 535 d.standbyOpinions.Stop() 536 537 if d.snapListener != nil { 538 // stop running hooks first 539 // and do it more gracefully if we are restarting 540 hookMgr := d.overlord.HookManager() 541 if ok, _ := d.state.Restarting(); ok { 542 logger.Noticef("gracefully waiting for running hooks") 543 hookMgr.GracefullyWaitRunningHooks() 544 logger.Noticef("done waiting for running hooks") 545 } 546 hookMgr.StopHooks() 547 d.snapListener.Close() 548 } 549 550 if restartSystem { 551 // give time to polling clients to notice restart 552 time.Sleep(rebootNoticeWait) 553 } 554 555 // We're using the background context here because the tomb's 556 // context will likely already have been cancelled when we are 557 // called. 558 ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout) 559 d.tomb.Kill(d.serve.Shutdown(ctx)) 560 cancel() 561 562 if !restartSystem { 563 // tell systemd that we are stopping 564 systemdSdNotify("STOPPING=1") 565 566 } 567 568 if restartSocket { 569 // At this point we processed all open requests (and 570 // stopped accepting new requests) - before going into 571 // socket activated mode we need to check if any of 572 // those open requests resulted in something that 573 // prevents us from going into socket activation mode. 574 // 575 // If this is the case we do a "normal" snapd restart 576 // to process the new changes. 577 if !d.standbyOpinions.CanStandby() { 578 d.restartSocket = false 579 } 580 } 581 d.overlord.Stop() 582 583 err := d.tomb.Wait() 584 if err != nil { 585 if err == context.DeadlineExceeded { 586 logger.Noticef("WARNING: cannot gracefully shut down in-flight snapd API activity within: %v", shutdownTimeout) 587 // the process is shutting down anyway, so we may just 588 // as well close the active connections right now 589 d.serve.Close() 590 } else { 591 // do not stop the shutdown even if the tomb errors 592 // because we already scheduled a slow shutdown and 593 // exiting here will just restart snapd (via systemd) 594 // which will lead to confusing results. 595 if restartSystem { 596 logger.Noticef("WARNING: cannot stop daemon: %v", err) 597 } else { 598 return err 599 } 600 } 601 } 602 603 if restartSystem { 604 return d.doReboot(sigCh, immediateReboot, rebootWaitTimeout) 605 } 606 607 if d.restartSocket { 608 return ErrRestartSocket 609 } 610 611 return nil 612 } 613 614 func (d *Daemon) rebootDelay(immediate bool) (time.Duration, error) { 615 d.state.Lock() 616 defer d.state.Unlock() 617 now := time.Now() 618 // see whether a reboot had already been scheduled 619 var rebootAt time.Time 620 err := d.state.Get("daemon-system-restart-at", &rebootAt) 621 if err != nil && err != state.ErrNoState { 622 return 0, err 623 } 624 rebootDelay := 1 * time.Minute 625 if immediate { 626 rebootDelay = 0 627 } 628 if err == nil { 629 rebootDelay = rebootAt.Sub(now) 630 } else { 631 ovr := os.Getenv("SNAPD_REBOOT_DELAY") // for tests 632 if ovr != "" && !immediate { 633 d, err := time.ParseDuration(ovr) 634 if err == nil { 635 rebootDelay = d 636 } 637 } 638 rebootAt = now.Add(rebootDelay) 639 d.state.Set("daemon-system-restart-at", rebootAt) 640 } 641 return rebootDelay, nil 642 } 643 644 func (d *Daemon) doReboot(sigCh chan<- os.Signal, immediate bool, waitTimeout time.Duration) error { 645 rebootDelay, err := d.rebootDelay(immediate) 646 if err != nil { 647 return err 648 } 649 // ask for shutdown and wait for it to happen. 650 // if we exit snapd will be restared by systemd 651 if err := reboot(rebootDelay); err != nil { 652 return err 653 } 654 // wait for reboot to happen 655 logger.Noticef("Waiting for system reboot") 656 if sigCh != nil { 657 signal.Stop(sigCh) 658 if len(sigCh) > 0 { 659 // a signal arrived in between 660 return nil 661 } 662 close(sigCh) 663 } 664 time.Sleep(waitTimeout) 665 return fmt.Errorf("expected reboot did not happen") 666 } 667 668 var shutdownMsg = i18n.G("reboot scheduled to update the system") 669 670 func rebootImpl(rebootDelay time.Duration) error { 671 if rebootDelay < 0 { 672 rebootDelay = 0 673 } 674 mins := int64(rebootDelay / time.Minute) 675 cmd := exec.Command("shutdown", "-r", fmt.Sprintf("+%d", mins), shutdownMsg) 676 if out, err := cmd.CombinedOutput(); err != nil { 677 return osutil.OutputErr(out, err) 678 } 679 return nil 680 } 681 682 var reboot = rebootImpl 683 684 // Dying is a tomb-ish thing 685 func (d *Daemon) Dying() <-chan struct{} { 686 return d.tomb.Dying() 687 } 688 689 func clearReboot(st *state.State) { 690 st.Set("daemon-system-restart-at", nil) 691 st.Set("daemon-system-restart-tentative", nil) 692 } 693 694 // RebootAsExpected implements part of overlord.RestartBehavior. 695 func (d *Daemon) RebootAsExpected(st *state.State) error { 696 clearReboot(st) 697 return nil 698 } 699 700 // RebootDidNotHappen implements part of overlord.RestartBehavior. 701 func (d *Daemon) RebootDidNotHappen(st *state.State) error { 702 var nTentative int 703 err := st.Get("daemon-system-restart-tentative", &nTentative) 704 if err != nil && err != state.ErrNoState { 705 return err 706 } 707 nTentative++ 708 if nTentative > rebootMaxTentatives { 709 // giving up, proceed normally, some in-progress refresh 710 // might get rolled back!! 711 st.ClearReboot() 712 clearReboot(st) 713 logger.Noticef("snapd was restarted while a system restart was expected, snapd retried to schedule and waited again for a system restart %d times and is giving up", rebootMaxTentatives) 714 return nil 715 } 716 st.Set("daemon-system-restart-tentative", nTentative) 717 d.state = st 718 logger.Noticef("snapd was restarted while a system restart was expected, snapd will try to schedule and wait for a system restart again (tenative %d/%d)", nTentative, rebootMaxTentatives) 719 return state.ErrExpectedReboot 720 } 721 722 // New Daemon 723 func New() (*Daemon, error) { 724 d := &Daemon{} 725 ovld, err := overlord.New(d) 726 if err == state.ErrExpectedReboot { 727 // we proceed without overlord until we reach Stop 728 // where we will schedule and wait again for a system restart. 729 // ATM we cannot do that in New because we need to satisfy 730 // systemd notify mechanisms. 731 d.expectedRebootDidNotHappen = true 732 return d, nil 733 } 734 if err != nil { 735 return nil, err 736 } 737 d.overlord = ovld 738 d.state = ovld.State() 739 return d, nil 740 }