github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/server/server.go (about) 1 package server 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 8 "time" 9 10 "github.com/golang/protobuf/ptypes/empty" 11 "go.aporeto.io/enforcerd/internal/extractors/containermetadata" 12 "go.aporeto.io/enforcerd/trireme-lib/common" 13 "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/counters" 14 monitorpb "go.aporeto.io/enforcerd/trireme-lib/monitor/api/spec/protos" 15 "go.aporeto.io/enforcerd/trireme-lib/monitor/constants" 16 "go.aporeto.io/enforcerd/trireme-lib/monitor/external" 17 "go.uber.org/zap" 18 "google.golang.org/grpc" 19 ) 20 21 var _ Controls = &Server{} 22 23 var _ external.ReceiverRegistration = &Server{} 24 25 var _ monitorpb.CNIServer = &Server{} 26 var _ monitorpb.RunCServer = &Server{} 27 28 // Controls is the controlling interface for starting/stopping the server 29 type Controls interface { 30 Start(context.Context) error 31 Stop() error 32 } 33 34 // Server is the grpcMonitorServer server 35 type Server struct { 36 ctx context.Context 37 enforcerID string 38 stop chan struct{} 39 enforcerStop chan struct{} 40 socketAddress string 41 socketType int 42 running bool 43 monitors map[string]external.ReceiveEvents 44 monitorsLock sync.RWMutex 45 runcProxyStarted bool 46 cniInstalled bool 47 notifyProcessRuncProxyStartedCh chan struct{} 48 notifyProcessCniInstalledCh chan struct{} 49 extMonitorStartedLock sync.RWMutex 50 waitStopGrp sync.WaitGroup 51 apoRuncWaitGrp *sync.WaitGroup 52 } 53 54 const ( 55 socketTypeUnix = iota 56 socketTypeTCP // nolint: varcheck 57 socketTypeWindowsNamedPipe 58 ) 59 60 // NewMonitorServer creates a gRPC server for the twistlock defender integration 61 func NewMonitorServer( 62 socketAddress string, 63 stopchan chan struct{}, 64 enforcerID string, 65 runcWaitGrp *sync.WaitGroup, 66 ) *Server { 67 return &Server{ 68 enforcerID: enforcerID, 69 stop: make(chan struct{}), 70 enforcerStop: stopchan, 71 socketAddress: socketAddress, 72 socketType: socketTypeUnix, 73 running: false, 74 monitors: make(map[string]external.ReceiveEvents), 75 notifyProcessRuncProxyStartedCh: make(chan struct{}), 76 notifyProcessCniInstalledCh: make(chan struct{}), 77 waitStopGrp: sync.WaitGroup{}, 78 apoRuncWaitGrp: runcWaitGrp, 79 } 80 } 81 82 // Start the grpcMonitorServer gRPC server 83 func (s *Server) Start(ctx context.Context) (err error) { 84 85 s.ctx = ctx 86 87 errChan := make(chan error) 88 zap.L().Info("Starting the gRPC Monitor server, listening on", zap.String("address", s.socketAddress)) 89 90 if err := cleanupPipe(s.socketAddress); err != nil { 91 zap.L().Fatal("unable to cleanup the old gRPC Monitor server socket address", zap.String("address", s.socketAddress), zap.Error(err)) 92 } 93 94 // create the listener 95 lis, err := makePipe(s.socketAddress) 96 if err != nil { 97 zap.L().Fatal("Failed to create the listener socket", zap.String("address", s.socketAddress), zap.Error(err)) 98 } 99 100 var opts []grpc.ServerOption 101 102 // TODO - TLS certs for the gRPC connection ?? 103 // if tls { 104 // creds, err := credentials.NewServerTLSFromFile(tls.certFile, tls.keyFile) 105 // if err != nil { 106 // zap.L().Fatal("Failed to load TLS credentials %v", zap.Error(err)) 107 // } 108 // 109 // opts = []grpc.ServerOption{grpc.Creds(creds)} 110 // } 111 112 grpcServer := grpc.NewServer(opts...) 113 114 // now register the runc and CNI servers. 115 monitorpb.RegisterCNIServer(grpcServer, s) 116 monitorpb.RegisterRunCServer(grpcServer, s) 117 zap.L().Debug("Starting the gRPC Monitor' server loop") 118 119 go s.processExtMonitorStarted(ctx) 120 121 // run blocking call in a separate goroutine, report errors via channel 122 go func() { 123 if err := grpcServer.Serve(lis); err != nil { 124 zap.L().Error("failed to start the gRPC Monitor' server", zap.Error(err)) 125 errChan <- err 126 } 127 zap.L().Debug("Exiting gRPC Monitor' server go func") 128 129 // the listener should be closed by this time, remove it 130 if s.socketType == socketTypeUnix || s.socketType == socketTypeWindowsNamedPipe { 131 if err := cleanupPipe(s.socketAddress); err != nil { 132 zap.L().Error("unable to cleanup the gRPC Monitor' server socket address", zap.String("address", s.socketAddress), zap.Error(err)) 133 errChan <- err 134 } 135 } 136 }() 137 // add the waitGrp to make sure that the GRPC shuts down graceFully. 138 s.waitStopGrp.Add(1) 139 140 // Start() is non-blocking, but we block in the go routine 141 // until either OS signal, or server fatal error 142 go func() { 143 144 s.running = true 145 zap.L().Debug("the gRPC Monitor' server loop is running") 146 147 // terminate gracefully 148 defer func() { 149 zap.L().Debug("Stopping the gRPC Monitor' server loop and listener socket") 150 grpcServer.GracefulStop() 151 // now we are sure that the connections have been drained completely. 152 s.waitStopGrp.Done() 153 s.running = false 154 }() 155 156 for { 157 select { 158 case <-s.stop: 159 zap.L().Debug("gRPC Monitor' server channel loop: got a stop notification on the stop channel") 160 return 161 case err := <-errChan: 162 zap.L().Fatal("gRPC Monitor' server channel loop: got an error notification on the error channel", zap.Error(err)) 163 case <-ctx.Done(): 164 return 165 } 166 } 167 }() 168 169 return nil 170 } 171 172 // Stop stops the Monitor' gRPC server (does not stop enforcer) 173 func (s *Server) Stop() error { 174 if s.running { 175 zap.L().Debug("gRPC Server: notified the graceful stop") 176 close(s.stop) 177 } 178 // add the wait for to make sure the GRPC gracefulStop drains all the connections. 179 s.waitStopGrp.Wait() 180 return nil 181 } 182 183 // RuncProxyStarted gets sent by the defender once when the defender has started the runc-proxy. 184 func (s *Server) RuncProxyStarted(context.Context, *empty.Empty) (*empty.Empty, error) { 185 zap.L().Info("grpc: runc-proxy has started") 186 s.extMonitorStartedLock.Lock() 187 s.runcProxyStarted = true 188 s.extMonitorStartedLock.Unlock() 189 s.notifyProcessRuncProxyStartedCh <- struct{}{} 190 return &empty.Empty{}, nil 191 } 192 193 // isRuncProxyStarted returns the internal state of RuncProxyStarted as a copy 194 func (s *Server) isRuncProxyStarted() bool { 195 s.extMonitorStartedLock.RLock() 196 defer s.extMonitorStartedLock.RUnlock() 197 return s.runcProxyStarted 198 } 199 200 // CniPluginInstalled gets sent by the defender once when the defender has started the runc-proxy. 201 func (s *Server) CniPluginInstalled(context.Context, *empty.Empty) (*empty.Empty, error) { 202 zap.L().Info("grpc: cni Plugin is installed") 203 s.extMonitorStartedLock.Lock() 204 s.cniInstalled = true 205 s.extMonitorStartedLock.Unlock() 206 s.notifyProcessCniInstalledCh <- struct{}{} 207 return &empty.Empty{}, nil 208 } 209 210 // isCniInstalled returns the internal state of RuncProxyStarted as a copy 211 func (s *Server) isCniInstalled() bool { 212 s.extMonitorStartedLock.RLock() 213 defer s.extMonitorStartedLock.RUnlock() 214 return s.cniInstalled 215 } 216 217 func (s *Server) processExtMonitorStarted(ctx context.Context) { 218 m := make(map[string]struct{}) 219 for { 220 // signal only when runc/cni has not yet started 221 if !s.isRuncProxyStarted() && !s.isCniInstalled() { 222 s.apoRuncWaitGrp.Done() 223 } 224 // wait for a notification: this will be sent for two cases: 225 // - RuncProxyStarted was called 226 // - a new monitor registers with the grpc servcer 227 select { 228 case <-ctx.Done(): 229 return 230 case <-s.notifyProcessRuncProxyStartedCh: 231 // continue here 232 case <-s.notifyProcessCniInstalledCh: 233 } 234 if s.isRuncProxyStarted() || s.isCniInstalled() { 235 s.monitorsLock.RLock() 236 // iterate over all currently registered monitors 237 // and if they haven't gotten the SenderReady() yet 238 // we will send it to them 239 for name, monitor := range s.monitors { 240 if _, ok := m[name]; ok { 241 continue 242 } 243 monitor.SenderReady() 244 m[name] = struct{}{} 245 } 246 s.monitorsLock.RUnlock() 247 } 248 } 249 } 250 251 const maxProcessingTime = time.Second * 5 252 253 func calProcessingTime(onStart time.Time, containerID string) { 254 processingTime := time.Since(onStart) 255 if processingTime > (maxProcessingTime) { 256 counters.IncrementCounter(counters.ErrSegmentServerContainerEventExceedsProcessingTime) 257 zap.L().Warn( 258 "grpc: ContainerEvent: processing of container event took longer than allowed processing time", 259 zap.String("id", containerID), 260 zap.Duration("processingTime", processingTime), 261 zap.Duration("maxProcessingTime", maxProcessingTime), 262 ) 263 } else { 264 zap.L().Debug( 265 "grpc: ContainerEvent: processing of container event was within allowed time frame", 266 zap.String("id", containerID), 267 zap.Duration("processingTime", processingTime), 268 zap.Duration("maxProcessingTime", maxProcessingTime), 269 ) 270 } 271 } 272 273 // CNIContainerEvent handles container event requests 274 func (s *Server) CNIContainerEvent(ctx context.Context, req *monitorpb.CNIContainerEventRequest) (*monitorpb.ContainerEventResponse, error) { 275 zap.L().Debug("grpc: CNI ContainerEvent received", zap.Any("request", req), zap.Any("type", req.Type)) 276 277 // calculate the time that this function takes and log accordingly 278 onStart := time.Now() 279 defer func() { 280 calProcessingTime(onStart, req.ContainerID) 281 }() 282 containerArgs := containermetadata.NewCniArguments(req) 283 // now send the container event to the monitor 284 return s.sendContainerEvent(ctx, containerArgs) 285 } 286 287 // RunCContainerEvent handles container event requests 288 func (s *Server) RunCContainerEvent(ctx context.Context, req *monitorpb.RunCContainerEventRequest) (*monitorpb.ContainerEventResponse, error) { 289 zap.L().Debug("grpc: runc ContainerEvent received", zap.Strings("commandLine", req.GetCommandLine())) 290 291 if !s.isRuncProxyStarted() { 292 zap.L().Warn("grpc: receiving ContainerEvent, but have not received RuncProxyStarted event yet. Compensating...") 293 s.RuncProxyStarted(ctx, &empty.Empty{}) // nolint 294 return &monitorpb.ContainerEventResponse{ 295 ErrorMessage: "received ContainerEvent before RuncProxyStarted event", 296 }, nil 297 } 298 299 // parse the runc command-line first 300 containerArgs, err := containermetadata.ParseRuncArguments(req.GetCommandLine()) 301 if err != nil { 302 zap.L().Error("grpc: ContainerEvent: failed to parse runc commandline") 303 return &monitorpb.ContainerEventResponse{ 304 ErrorMessage: fmt.Sprintf("failed to parse runc commandline: %s", err), 305 }, nil 306 } 307 // calculate the time that this function takes and log accordingly 308 onStart := time.Now() 309 defer func() { 310 calProcessingTime(onStart, containerArgs.ID()) 311 }() 312 // now send the container event to the monitor 313 return s.sendContainerEvent(ctx, containerArgs) 314 } 315 316 func (s *Server) sendContainerEvent(ctx context.Context, containerArgs containermetadata.ContainerArgs) (*monitorpb.ContainerEventResponse, error) { 317 var kmd containermetadata.CommonKubernetesContainerMetadata 318 var md containermetadata.CommonContainerMetadata 319 var err error 320 // now 1st check if the netnsPath is given, if given then its a CNI event and process it 1st 321 // if the netnsPath is not given then we fallback to the default mechanism for extraction. 322 // if we can identify that we have this container 323 if len(containerArgs.NetNsPath()) > 0 && len(containerArgs.PodName()) > 0 && len(containerArgs.PodNamespace()) > 0 { 324 // create the cni containerMetadata 325 kmd = containermetadata.NewCniContainerMetadata(containerArgs) 326 } else if containermetadata.AutoDetect().Has(containerArgs) { 327 328 // then extract the common container metadata 329 md, kmd, err = containermetadata.AutoDetect().Extract(containerArgs) 330 if err != nil { 331 return &monitorpb.ContainerEventResponse{ 332 ErrorMessage: fmt.Sprintf("failed to parse runc commandline: %s", err), 333 }, nil 334 } 335 336 // as we are only interested in Kubernetes containers at the moment 337 // simply log if this is a non-Kubernetes event 338 if md != nil && kmd == nil { 339 zap.L().Debug( 340 "grpc: ContainerEvent: container event does not belong to a Kubernetes container", 341 zap.String("md.ID()", md.ID()), 342 zap.String("md.Root()", md.Root()), 343 zap.String("md.Kind()", md.Kind().String()), 344 zap.String("md.Runtime()", md.Runtime().String()), 345 zap.Int("md.PID()", md.PID()), 346 zap.Bool("md.SystemdCgroups()", md.SystemdCgroups()), 347 ) 348 return &monitorpb.ContainerEventResponse{}, nil 349 } 350 } 351 352 // and now send an event to the K8s monitor 353 if kmd != nil { 354 zap.L().Debug( 355 "grpc: ContainerEvent: container event belongs to a Kubernetes container", 356 zap.String("kmd.ID()", kmd.ID()), 357 zap.String("kmd.Root()", kmd.Root()), 358 zap.String("kmd.Kind()", kmd.Kind().String()), 359 zap.String("kmd.Runtime()", kmd.Runtime().String()), 360 zap.Int("kmd.PID()", kmd.PID()), 361 zap.Bool("kmd.SystemdCgroups()", kmd.SystemdCgroups()), 362 zap.String("kmd.PodName()", kmd.PodName()), 363 zap.String("kmd.NetNsPath()", kmd.NetNSPath()), 364 zap.String("kmd.PodNamespace()", kmd.PodNamespace()), 365 zap.String("kmd.PodUID()", kmd.PodUID()), 366 zap.String("kmd.PodSandboxID()", kmd.PodSandboxID()), 367 ) 368 369 s.monitorsLock.RLock() 370 defer s.monitorsLock.RUnlock() 371 monitor, ok := s.monitors[constants.K8sMonitorRegistrationName] 372 if !ok { 373 zap.L().Debug("grpc: K8s monitor is not registered yet. Skipping processing of event.") 374 return &monitorpb.ContainerEventResponse{ 375 ErrorMessage: "K8s monitor is not initialized yet", 376 }, nil 377 } 378 379 switch containerArgs.Action() { 380 case containermetadata.StartAction: 381 // the start action MUST be synchronous at all costs 382 monitor.Event(ctx, common.EventStart, kmd) // nolint: errcheck 383 case containermetadata.DeleteAction: 384 // the delete event SHOULD be synchronous 385 // however, we can unblock the caller and respect the context if it is not 386 ch := make(chan struct{}) 387 go func() { 388 monitor.Event(context.Background(), common.EventDestroy, kmd) // nolint: errcheck 389 close(ch) 390 }() 391 select { 392 case <-ctx.Done(): 393 zap.L().Warn("grpc: ContainerEvent: failed to process delete event within the context constraints", 394 zap.String("kmd.ID()", kmd.ID()), 395 zap.String("kmd.PodName()", kmd.PodName()), 396 zap.String("kmd.PodNamespace()", kmd.PodNamespace()), 397 zap.String("kmd.PodUID()", kmd.PodUID()), 398 zap.String("kmd.NetNsPath()", kmd.NetNSPath()), 399 zap.Error(ctx.Err()), 400 ) 401 case <-ch: 402 // success, nothing more needs to be done 403 } 404 default: 405 zap.L().Debug("grpc: unsupported action by the K8s monitor", zap.String("action", containerArgs.Action().String())) 406 return &monitorpb.ContainerEventResponse{ 407 ErrorMessage: "unexpected action received: " + containerArgs.Action().String(), 408 }, nil 409 } 410 411 return &monitorpb.ContainerEventResponse{}, nil 412 } 413 414 // log an error if we can't find it because we should always be able to find it, and this is an error in the extractor 415 zap.L().Error("grpc: ContainerEvent: container not found", zap.String("containerID", containerArgs.ID()), zap.String("action", containerArgs.Action().String())) 416 return &monitorpb.ContainerEventResponse{ 417 ErrorMessage: "container not found", 418 }, nil 419 } 420 421 // SenderName must return a globally unique name of the implementor. 422 func (s *Server) SenderName() string { 423 return constants.MonitorExtSenderName 424 } 425 426 // Register will register the given `monitor` for receiving events under `name`. 427 // Multiple calls to this function for the same `name` must update the internal 428 // state of the implementor to now send events to the newly regitered monitor of this 429 // name. Only one registration of a monitor of the same name is allowed. 430 func (s *Server) Register(name string, monitor external.ReceiveEvents) error { 431 s.monitorsLock.Lock() 432 defer s.monitorsLock.Unlock() 433 s.monitors[name] = monitor 434 s.notifyProcessRuncProxyStartedCh <- struct{}{} 435 return nil 436 }