github.com/georgethebeatle/containerd@v0.2.5/supervisor/supervisor.go (about) 1 package supervisor 2 3 import ( 4 "encoding/json" 5 "io" 6 "io/ioutil" 7 "os" 8 "path/filepath" 9 "sync" 10 "time" 11 12 "github.com/Sirupsen/logrus" 13 "github.com/docker/containerd/runtime" 14 ) 15 16 const ( 17 defaultBufferSize = 2048 // size of queue in eventloop 18 ) 19 20 // New returns an initialized Process supervisor. 21 func New(stateDir string, runtimeName, shimName string, runtimeArgs []string, timeout time.Duration, retainCount int) (*Supervisor, error) { 22 startTasks := make(chan *startTask, 10) 23 if err := os.MkdirAll(stateDir, 0755); err != nil { 24 return nil, err 25 } 26 machine, err := CollectMachineInformation() 27 if err != nil { 28 return nil, err 29 } 30 monitor, err := NewMonitor() 31 if err != nil { 32 return nil, err 33 } 34 s := &Supervisor{ 35 stateDir: stateDir, 36 containers: make(map[string]*containerInfo), 37 startTasks: startTasks, 38 machine: machine, 39 subscribers: make(map[chan Event]struct{}), 40 tasks: make(chan Task, defaultBufferSize), 41 monitor: monitor, 42 runtime: runtimeName, 43 runtimeArgs: runtimeArgs, 44 shim: shimName, 45 timeout: timeout, 46 } 47 if err := setupEventLog(s, retainCount); err != nil { 48 return nil, err 49 } 50 go s.exitHandler() 51 go s.oomHandler() 52 if err := s.restore(); err != nil { 53 return nil, err 54 } 55 return s, nil 56 } 57 58 type containerInfo struct { 59 container runtime.Container 60 } 61 62 func setupEventLog(s *Supervisor, retainCount int) error { 63 if err := readEventLog(s); err != nil { 64 return err 65 } 66 logrus.WithField("count", len(s.eventLog)).Debug("containerd: read past events") 67 events := s.Events(time.Time{}, false, "") 68 return eventLogger(s, filepath.Join(s.stateDir, "events.log"), events, retainCount) 69 } 70 71 func eventLogger(s *Supervisor, path string, events chan Event, retainCount int) error { 72 f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND|os.O_TRUNC, 0755) 73 if err != nil { 74 return err 75 } 76 go func() { 77 var ( 78 count = len(s.eventLog) 79 enc = json.NewEncoder(f) 80 ) 81 for e := range events { 82 // if we have a specified retain count make sure the truncate the event 83 // log if it grows past the specified number of events to keep. 84 if retainCount > 0 { 85 if count > retainCount { 86 logrus.Debug("truncating event log") 87 // close the log file 88 if f != nil { 89 f.Close() 90 } 91 slice := retainCount - 1 92 l := len(s.eventLog) 93 if slice >= l { 94 slice = l 95 } 96 s.eventLock.Lock() 97 s.eventLog = s.eventLog[len(s.eventLog)-slice:] 98 s.eventLock.Unlock() 99 if f, err = os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND|os.O_TRUNC, 0755); err != nil { 100 logrus.WithField("error", err).Error("containerd: open event to journal") 101 continue 102 } 103 enc = json.NewEncoder(f) 104 count = 0 105 for _, le := range s.eventLog { 106 if err := enc.Encode(le); err != nil { 107 logrus.WithField("error", err).Error("containerd: write event to journal") 108 } 109 } 110 } 111 } 112 s.eventLock.Lock() 113 s.eventLog = append(s.eventLog, e) 114 s.eventLock.Unlock() 115 count++ 116 if err := enc.Encode(e); err != nil { 117 logrus.WithField("error", err).Error("containerd: write event to journal") 118 } 119 } 120 }() 121 return nil 122 } 123 124 func readEventLog(s *Supervisor) error { 125 f, err := os.Open(filepath.Join(s.stateDir, "events.log")) 126 if err != nil { 127 if os.IsNotExist(err) { 128 return nil 129 } 130 return err 131 } 132 defer f.Close() 133 dec := json.NewDecoder(f) 134 for { 135 var e eventV1 136 if err := dec.Decode(&e); err != nil { 137 if err == io.EOF { 138 break 139 } 140 return err 141 } 142 143 // We need to take care of -1 Status for backward compatibility 144 ev := e.Event 145 ev.Status = uint32(e.Status) 146 if ev.Status > runtime.UnknownStatus { 147 ev.Status = runtime.UnknownStatus 148 } 149 s.eventLog = append(s.eventLog, ev) 150 } 151 return nil 152 } 153 154 // Supervisor represents a container supervisor 155 type Supervisor struct { 156 // stateDir is the directory on the system to store container runtime state information. 157 stateDir string 158 // name of the OCI compatible runtime used to execute containers 159 runtime string 160 runtimeArgs []string 161 shim string 162 containers map[string]*containerInfo 163 startTasks chan *startTask 164 // we need a lock around the subscribers map only because additions and deletions from 165 // the map are via the API so we cannot really control the concurrency 166 subscriberLock sync.RWMutex 167 subscribers map[chan Event]struct{} 168 machine Machine 169 tasks chan Task 170 monitor *Monitor 171 eventLog []Event 172 eventLock sync.Mutex 173 timeout time.Duration 174 } 175 176 // Stop closes all startTasks and sends a SIGTERM to each container's pid1 then waits for they to 177 // terminate. After it has handled all the SIGCHILD events it will close the signals chan 178 // and exit. Stop is a non-blocking call and will return after the containers have been signaled 179 func (s *Supervisor) Stop() { 180 // Close the startTasks channel so that no new containers get started 181 close(s.startTasks) 182 } 183 184 // Close closes any open files in the supervisor but expects that Stop has been 185 // callsed so that no more containers are started. 186 func (s *Supervisor) Close() error { 187 return nil 188 } 189 190 // Event represents a container event 191 type Event struct { 192 ID string `json:"id"` 193 Type string `json:"type"` 194 Timestamp time.Time `json:"timestamp"` 195 PID string `json:"pid,omitempty"` 196 Status uint32 `json:"status,omitempty"` 197 } 198 199 type eventV1 struct { 200 Event 201 Status int `json:"status,omitempty"` 202 } 203 204 // Events returns an event channel that external consumers can use to receive updates 205 // on container events 206 func (s *Supervisor) Events(from time.Time, storedOnly bool, id string) chan Event { 207 c := make(chan Event, defaultBufferSize) 208 if storedOnly { 209 defer s.Unsubscribe(c) 210 } 211 s.subscriberLock.Lock() 212 defer s.subscriberLock.Unlock() 213 if !from.IsZero() { 214 // replay old event 215 s.eventLock.Lock() 216 past := s.eventLog[:] 217 s.eventLock.Unlock() 218 for _, e := range past { 219 if e.Timestamp.After(from) { 220 if id == "" || e.ID == id { 221 c <- e 222 } 223 } 224 } 225 } 226 if storedOnly { 227 close(c) 228 } else { 229 EventSubscriberCounter.Inc(1) 230 s.subscribers[c] = struct{}{} 231 } 232 return c 233 } 234 235 // Unsubscribe removes the provided channel from receiving any more events 236 func (s *Supervisor) Unsubscribe(sub chan Event) { 237 s.subscriberLock.Lock() 238 defer s.subscriberLock.Unlock() 239 if _, ok := s.subscribers[sub]; ok { 240 delete(s.subscribers, sub) 241 close(sub) 242 EventSubscriberCounter.Dec(1) 243 } 244 } 245 246 // notifySubscribers will send the provided event to the external subscribers 247 // of the events channel 248 func (s *Supervisor) notifySubscribers(e Event) { 249 s.subscriberLock.RLock() 250 defer s.subscriberLock.RUnlock() 251 for sub := range s.subscribers { 252 // do a non-blocking send for the channel 253 select { 254 case sub <- e: 255 default: 256 logrus.WithField("event", e.Type).Warn("containerd: event not sent to subscriber") 257 } 258 } 259 } 260 261 // Start is a non-blocking call that runs the supervisor for monitoring contianer processes and 262 // executing new containers. 263 // 264 // This event loop is the only thing that is allowed to modify state of containers and processes 265 // therefore it is save to do operations in the handlers that modify state of the system or 266 // state of the Supervisor 267 func (s *Supervisor) Start() error { 268 logrus.WithFields(logrus.Fields{ 269 "stateDir": s.stateDir, 270 "runtime": s.runtime, 271 "runtimeArgs": s.runtimeArgs, 272 "memory": s.machine.Memory, 273 "cpus": s.machine.Cpus, 274 }).Debug("containerd: supervisor running") 275 go func() { 276 for i := range s.tasks { 277 s.handleTask(i) 278 } 279 }() 280 return nil 281 } 282 283 // Machine returns the machine information for which the 284 // supervisor is executing on. 285 func (s *Supervisor) Machine() Machine { 286 return s.machine 287 } 288 289 // SendTask sends the provided event the the supervisors main event loop 290 func (s *Supervisor) SendTask(evt Task) { 291 TasksCounter.Inc(1) 292 s.tasks <- evt 293 } 294 295 func (s *Supervisor) exitHandler() { 296 for p := range s.monitor.Exits() { 297 e := &ExitTask{ 298 Process: p, 299 } 300 s.SendTask(e) 301 } 302 } 303 304 func (s *Supervisor) oomHandler() { 305 for id := range s.monitor.OOMs() { 306 e := &OOMTask{ 307 ID: id, 308 } 309 s.SendTask(e) 310 } 311 } 312 313 func (s *Supervisor) monitorProcess(p runtime.Process) error { 314 return s.monitor.Monitor(p) 315 } 316 317 func (s *Supervisor) restore() error { 318 dirs, err := ioutil.ReadDir(s.stateDir) 319 if err != nil { 320 return err 321 } 322 for _, d := range dirs { 323 if !d.IsDir() { 324 continue 325 } 326 id := d.Name() 327 container, err := runtime.Load(s.stateDir, id, s.shim, s.timeout) 328 if err != nil { 329 return err 330 } 331 processes, err := container.Processes() 332 if err != nil { 333 return err 334 } 335 336 ContainersCounter.Inc(1) 337 s.containers[id] = &containerInfo{ 338 container: container, 339 } 340 if err := s.monitor.MonitorOOM(container); err != nil && err != runtime.ErrContainerExited { 341 logrus.WithField("error", err).Error("containerd: notify OOM events") 342 } 343 logrus.WithField("id", id).Debug("containerd: container restored") 344 var exitedProcesses []runtime.Process 345 for _, p := range processes { 346 if p.State() == runtime.Running { 347 if err := s.monitorProcess(p); err != nil { 348 return err 349 } 350 } else { 351 exitedProcesses = append(exitedProcesses, p) 352 } 353 } 354 if len(exitedProcesses) > 0 { 355 // sort processes so that init is fired last because that is how the kernel sends the 356 // exit events 357 sortProcesses(exitedProcesses) 358 for _, p := range exitedProcesses { 359 e := &ExitTask{ 360 Process: p, 361 } 362 s.SendTask(e) 363 } 364 } 365 } 366 return nil 367 } 368 369 func (s *Supervisor) handleTask(i Task) { 370 var err error 371 switch t := i.(type) { 372 case *AddProcessTask: 373 err = s.addProcess(t) 374 case *CreateCheckpointTask: 375 err = s.createCheckpoint(t) 376 case *DeleteCheckpointTask: 377 err = s.deleteCheckpoint(t) 378 case *StartTask: 379 err = s.start(t) 380 case *DeleteTask: 381 err = s.delete(t) 382 case *ExitTask: 383 err = s.exit(t) 384 case *GetContainersTask: 385 err = s.getContainers(t) 386 case *SignalTask: 387 err = s.signal(t) 388 case *StatsTask: 389 err = s.stats(t) 390 case *UpdateTask: 391 err = s.updateContainer(t) 392 case *UpdateProcessTask: 393 err = s.updateProcess(t) 394 case *OOMTask: 395 err = s.oom(t) 396 default: 397 err = ErrUnknownTask 398 } 399 if err != errDeferredResponse { 400 i.ErrorCh() <- err 401 close(i.ErrorCh()) 402 } 403 }