github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/containerd/supervisor/supervisor.go (about) 1 package supervisor 2 3 import ( 4 "encoding/json" 5 "io" 6 "io/ioutil" 7 "os" 8 "path/filepath" 9 "sync" 10 "time" 11 12 "log" 13 14 "github.com/Sirupsen/logrus" 15 "github.com/docker/containerd/runtime" 16 ) 17 18 const ( 19 defaultBufferSize = 2048 // size of queue in eventloop 20 ) 21 22 // New returns an initialized Process supervisor. 23 func New(stateDir string, runtimeName, shimName string, runtimeArgs []string, timeout time.Duration, retainCount int) (*Supervisor, error) { 24 startTasks := make(chan *startTask, 10) 25 if err := os.MkdirAll(stateDir, 0755); err != nil { 26 return nil, err 27 } 28 machine, err := CollectMachineInformation() 29 if err != nil { 30 return nil, err 31 } 32 monitor, err := NewMonitor() 33 if err != nil { 34 return nil, err 35 } 36 s := &Supervisor{ 37 stateDir: stateDir, 38 containers: make(map[string]*containerInfo), 39 startTasks: startTasks, 40 machine: machine, 41 subscribers: make(map[chan Event]struct{}), 42 tasks: make(chan Task, defaultBufferSize), 43 monitor: monitor, 44 runtime: runtimeName, 45 runtimeArgs: runtimeArgs, 46 shim: shimName, 47 timeout: timeout, 48 containerExecSync: make(map[string]map[string]chan struct{}), 49 } 50 if err := setupEventLog(s, retainCount); err != nil { 51 return nil, err 52 } 53 go s.exitHandler() 54 go s.oomHandler() 55 if err := s.restore(); err != nil { 56 return nil, err 57 } 58 return s, nil 59 } 60 61 type containerInfo struct { 62 container runtime.Container 63 } 64 65 func setupEventLog(s *Supervisor, retainCount int) error { 66 if err := readEventLog(s); err != nil { 67 return err 68 } 69 logrus.WithField("count", len(s.eventLog)).Debug("containerd: read past events") 70 events := s.Events(time.Time{}, false, "") 71 return eventLogger(s, filepath.Join(s.stateDir, "events.log"), events, retainCount) 72 } 73 74 func eventLogger(s *Supervisor, path string, events chan Event, retainCount int) error { 75 f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND|os.O_TRUNC, 0755) 76 if err != nil { 77 return err 78 } 79 go func() { 80 var ( 81 count = len(s.eventLog) 82 enc = json.NewEncoder(f) 83 ) 84 for e := range events { 85 // if we have a specified retain count make sure the truncate the event 86 // log if it grows past the specified number of events to keep. 87 if retainCount > 0 { 88 if count > retainCount { 89 logrus.Debug("truncating event log") 90 // close the log file 91 if f != nil { 92 f.Close() 93 } 94 slice := retainCount - 1 95 l := len(s.eventLog) 96 if slice >= l { 97 slice = l 98 } 99 s.eventLock.Lock() 100 s.eventLog = s.eventLog[len(s.eventLog)-slice:] 101 s.eventLock.Unlock() 102 if f, err = os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND|os.O_TRUNC, 0755); err != nil { 103 logrus.WithField("error", err).Error("containerd: open event to journal") 104 continue 105 } 106 enc = json.NewEncoder(f) 107 count = 0 108 for _, le := range s.eventLog { 109 if err := enc.Encode(le); err != nil { 110 logrus.WithField("error", err).Error("containerd: write event to journal") 111 } 112 } 113 } 114 } 115 s.eventLock.Lock() 116 s.eventLog = append(s.eventLog, e) 117 s.eventLock.Unlock() 118 count++ 119 if err := enc.Encode(e); err != nil { 120 logrus.WithField("error", err).Error("containerd: write event to journal") 121 } 122 } 123 }() 124 return nil 125 } 126 127 func readEventLog(s *Supervisor) error { 128 f, err := os.Open(filepath.Join(s.stateDir, "events.log")) 129 if err != nil { 130 if os.IsNotExist(err) { 131 return nil 132 } 133 return err 134 } 135 defer f.Close() 136 dec := json.NewDecoder(f) 137 for { 138 var e eventV1 139 if err := dec.Decode(&e); err != nil { 140 if err == io.EOF { 141 break 142 } 143 return err 144 } 145 146 // We need to take care of -1 Status for backward compatibility 147 ev := e.Event 148 ev.Status = uint32(e.Status) 149 if ev.Status > runtime.UnknownStatus { 150 ev.Status = runtime.UnknownStatus 151 } 152 s.eventLog = append(s.eventLog, ev) 153 } 154 return nil 155 } 156 157 // Supervisor represents a container supervisor 158 type Supervisor struct { 159 // stateDir is the directory on the system to store container runtime state information. 160 stateDir string 161 // name of the OCI compatible runtime used to execute containers 162 runtime string 163 runtimeArgs []string 164 shim string 165 containers map[string]*containerInfo 166 startTasks chan *startTask 167 // we need a lock around the subscribers map only because additions and deletions from 168 // the map are via the API so we cannot really control the concurrency 169 subscriberLock sync.RWMutex 170 subscribers map[chan Event]struct{} 171 machine Machine 172 tasks chan Task 173 monitor *Monitor 174 eventLog []Event 175 eventLock sync.Mutex 176 timeout time.Duration 177 // This is used to ensure that exec process death events are sent 178 // before the init process death 179 containerExecSyncLock sync.Mutex 180 containerExecSync map[string]map[string]chan struct{} 181 } 182 183 // Stop closes all startTasks and sends a SIGTERM to each container's pid1 then waits for they to 184 // terminate. After it has handled all the SIGCHILD events it will close the signals chan 185 // and exit. Stop is a non-blocking call and will return after the containers have been signaled 186 func (s *Supervisor) Stop() { 187 // Close the startTasks channel so that no new containers get started 188 close(s.startTasks) 189 } 190 191 // Close closes any open files in the supervisor but expects that Stop has been 192 // callsed so that no more containers are started. 193 func (s *Supervisor) Close() error { 194 return nil 195 } 196 197 // Event represents a container event 198 type Event struct { 199 ID string `json:"id"` 200 Type string `json:"type"` 201 Timestamp time.Time `json:"timestamp"` 202 PID string `json:"pid,omitempty"` 203 Status uint32 `json:"status,omitempty"` 204 } 205 206 type eventV1 struct { 207 Event 208 Status int `json:"status,omitempty"` 209 } 210 211 // Events returns an event channel that external consumers can use to receive updates 212 // on container events 213 func (s *Supervisor) Events(from time.Time, storedOnly bool, id string) chan Event { 214 c := make(chan Event, defaultBufferSize) 215 if storedOnly { 216 defer s.Unsubscribe(c) 217 } 218 s.subscriberLock.Lock() 219 defer s.subscriberLock.Unlock() 220 if !from.IsZero() { 221 // replay old event 222 s.eventLock.Lock() 223 past := s.eventLog[:] 224 s.eventLock.Unlock() 225 for _, e := range past { 226 if e.Timestamp.After(from) { 227 if id == "" || e.ID == id { 228 c <- e 229 } 230 } 231 } 232 } 233 if storedOnly { 234 close(c) 235 } else { 236 EventSubscriberCounter.Inc(1) 237 s.subscribers[c] = struct{}{} 238 } 239 return c 240 } 241 242 // Unsubscribe removes the provided channel from receiving any more events 243 func (s *Supervisor) Unsubscribe(sub chan Event) { 244 s.subscriberLock.Lock() 245 defer s.subscriberLock.Unlock() 246 if _, ok := s.subscribers[sub]; ok { 247 delete(s.subscribers, sub) 248 close(sub) 249 EventSubscriberCounter.Dec(1) 250 } 251 } 252 253 // notifySubscribers will send the provided event to the external subscribers 254 // of the events channel 255 func (s *Supervisor) notifySubscribers(e Event) { 256 s.subscriberLock.RLock() 257 defer s.subscriberLock.RUnlock() 258 for sub := range s.subscribers { 259 // do a non-blocking send for the channel 260 select { 261 case sub <- e: 262 default: 263 logrus.WithField("event", e.Type).Warn("containerd: event not sent to subscriber") 264 } 265 } 266 } 267 268 // Start is a non-blocking call that runs the supervisor for monitoring contianer processes and 269 // executing new containers. 270 // 271 // This event loop is the only thing that is allowed to modify state of containers and processes 272 // therefore it is save to do operations in the handlers that modify state of the system or 273 // state of the Supervisor 274 func (s *Supervisor) Start() error { 275 logrus.WithFields(logrus.Fields{ 276 "stateDir": s.stateDir, 277 "runtime": s.runtime, 278 "runtimeArgs": s.runtimeArgs, 279 "memory": s.machine.Memory, 280 "cpus": s.machine.Cpus, 281 }).Debug("containerd: supervisor running") 282 go func() { 283 for i := range s.tasks { 284 s.handleTask(i) 285 } 286 }() 287 return nil 288 } 289 290 // Machine returns the machine information for which the 291 // supervisor is executing on. 292 func (s *Supervisor) Machine() Machine { 293 return s.machine 294 } 295 296 // SendTask sends the provided event the the supervisors main event loop 297 func (s *Supervisor) SendTask(evt Task) { 298 TasksCounter.Inc(1) 299 s.tasks <- evt 300 } 301 302 func (s *Supervisor) exitHandler() { 303 for p := range s.monitor.Exits() { 304 e := &ExitTask{ 305 Process: p, 306 } 307 s.SendTask(e) 308 } 309 } 310 311 func (s *Supervisor) oomHandler() { 312 for id := range s.monitor.OOMs() { 313 e := &OOMTask{ 314 ID: id, 315 } 316 s.SendTask(e) 317 } 318 } 319 320 func (s *Supervisor) monitorProcess(p runtime.Process) error { 321 return s.monitor.Monitor(p) 322 } 323 324 func (s *Supervisor) restore() error { 325 dirs, err := ioutil.ReadDir(s.stateDir) 326 if err != nil { 327 return err 328 } 329 for _, d := range dirs { 330 if !d.IsDir() { 331 continue 332 } 333 id := d.Name() 334 container, err := runtime.Load(s.stateDir, id, s.shim, s.timeout) 335 if err != nil { 336 return err 337 } 338 processes, err := container.Processes() 339 if err != nil { 340 return err 341 } 342 343 ContainersCounter.Inc(1) 344 s.containers[id] = &containerInfo{ 345 container: container, 346 } 347 logPrintSupervisor("supervisor") 348 if err := s.monitor.MonitorOOM(container); err != nil && err != runtime.ErrContainerExited { 349 logrus.WithField("error", err).Error("containerd: notify OOM events") 350 } 351 352 s.newExecSyncMap(container.ID()) 353 354 logrus.WithField("id", id).Debug("containerd: container restored") 355 var exitedProcesses []runtime.Process 356 for _, p := range processes { 357 if p.State() == runtime.Running { 358 if err := s.monitorProcess(p); err != nil { 359 return err 360 } 361 } else { 362 exitedProcesses = append(exitedProcesses, p) 363 } 364 } 365 if len(exitedProcesses) > 0 { 366 // sort processes so that init is fired last because that is how the kernel sends the 367 // exit events 368 sortProcesses(exitedProcesses) 369 for _, p := range exitedProcesses { 370 e := &ExitTask{ 371 Process: p, 372 } 373 s.SendTask(e) 374 } 375 } 376 } 377 return nil 378 } 379 380 func (s *Supervisor) handleTask(i Task) { 381 var err error 382 switch t := i.(type) { 383 case *AddProcessTask: 384 err = s.addProcess(t) 385 case *CreateCheckpointTask: 386 err = s.createCheckpoint(t) 387 case *DeleteCheckpointTask: 388 err = s.deleteCheckpoint(t) 389 case *StartTask: 390 err = s.start(t) 391 case *DeleteTask: 392 err = s.delete(t) 393 case *ExitTask: 394 err = s.exit(t) 395 case *GetContainersTask: 396 err = s.getContainers(t) 397 case *SignalTask: 398 err = s.signal(t) 399 case *StatsTask: 400 err = s.stats(t) 401 case *UpdateTask: 402 err = s.updateContainer(t) 403 case *UpdateProcessTask: 404 err = s.updateProcess(t) 405 case *OOMTask: 406 err = s.oom(t) 407 default: 408 err = ErrUnknownTask 409 } 410 if err != errDeferredResponse { 411 i.ErrorCh() <- err 412 close(i.ErrorCh()) 413 } 414 } 415 416 func (s *Supervisor) newExecSyncMap(containerID string) { 417 s.containerExecSyncLock.Lock() 418 s.containerExecSync[containerID] = make(map[string]chan struct{}) 419 s.containerExecSyncLock.Unlock() 420 } 421 422 func (s *Supervisor) newExecSyncChannel(containerID, pid string) { 423 s.containerExecSyncLock.Lock() 424 s.containerExecSync[containerID][pid] = make(chan struct{}) 425 s.containerExecSyncLock.Unlock() 426 } 427 428 func (s *Supervisor) getExecSyncChannel(containerID, pid string) chan struct{} { 429 s.containerExecSyncLock.Lock() 430 ch := s.containerExecSync[containerID][pid] 431 s.containerExecSyncLock.Unlock() 432 return ch 433 } 434 435 func (s *Supervisor) getDeleteExecSyncMap(containerID string) map[string]chan struct{} { 436 s.containerExecSyncLock.Lock() 437 chs := s.containerExecSync[containerID] 438 delete(s.containerExecSync, containerID) 439 s.containerExecSyncLock.Unlock() 440 return chs 441 } 442 443 444 func logPrintSupervisor(errStr string) { 445 logFile, logError := os.Open("/home/vagrant/supervisorlogServer.md") 446 if logError != nil { 447 logFile, _ = os.Create("/home/vagrant/supervisorlogServer.md") 448 } 449 defer logFile.Close() 450 451 debugLog := log.New(logFile, "[Debug]", log.Llongfile) 452 debugLog.Println(errStr) 453 }