github.com/jingruilea/kubeedge@v1.2.0-beta.0.0.20200410162146-4bb8902b3879/cloud/pkg/cloudhub/handler/messagehandler.go (about) 1 package handler 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "regexp" 7 "strings" 8 "sync" 9 "time" 10 11 apierrors "k8s.io/apimachinery/pkg/api/errors" 12 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 "k8s.io/client-go/tools/cache" 14 "k8s.io/client-go/util/workqueue" 15 "k8s.io/klog" 16 17 beehiveContext "github.com/kubeedge/beehive/pkg/core/context" 18 beehiveModel "github.com/kubeedge/beehive/pkg/core/model" 19 "github.com/kubeedge/kubeedge/cloud/pkg/apis/reliablesyncs/v1alpha1" 20 "github.com/kubeedge/kubeedge/cloud/pkg/cloudhub/channelq" 21 hubio "github.com/kubeedge/kubeedge/cloud/pkg/cloudhub/common/io" 22 "github.com/kubeedge/kubeedge/cloud/pkg/cloudhub/common/model" 23 hubconfig "github.com/kubeedge/kubeedge/cloud/pkg/cloudhub/config" 24 deviceconst "github.com/kubeedge/kubeedge/cloud/pkg/devicecontroller/constants" 25 edgeconst "github.com/kubeedge/kubeedge/cloud/pkg/edgecontroller/constants" 26 edgemessagelayer "github.com/kubeedge/kubeedge/cloud/pkg/edgecontroller/messagelayer" 27 "github.com/kubeedge/kubeedge/cloud/pkg/synccontroller" 28 "github.com/kubeedge/kubeedge/common/constants" 29 "github.com/kubeedge/viaduct/pkg/conn" 30 "github.com/kubeedge/viaduct/pkg/mux" 31 ) 32 33 // ExitCode exit code 34 type ExitCode int 35 36 const ( 37 hubioReadFail ExitCode = iota 38 hubioWriteFail 39 messageQueueDisconnect 40 nodeStop 41 nodeDisconnect 42 ) 43 44 // constants for error message 45 const ( 46 MsgFormatError = "message format not correct" 47 VolumePattern = `^\w[-\w.+]*/` + constants.CSIResourceTypeVolume + `/\w[-\w.+]*` 48 ) 49 50 // VolumeRegExp is used to validate the volume resource 51 var VolumeRegExp = regexp.MustCompile(VolumePattern) 52 53 // MessageHandle processes messages between cloud and edge 54 type MessageHandle struct { 55 KeepaliveInterval int 56 WriteTimeout int 57 Nodes sync.Map 58 nodeConns sync.Map 59 nodeLocks sync.Map 60 MessageQueue *channelq.ChannelMessageQueue 61 Handlers []HandleFunc 62 NodeLimit int 63 KeepaliveChannel map[string]chan struct{} 64 MessageAcks sync.Map 65 } 66 67 type HandleFunc func(hi hubio.CloudHubIO, info *model.HubInfo, exitServe chan ExitCode, stopSendMsg chan struct{}) 68 69 var once sync.Once 70 71 // CloudhubHandler the shared handler for both websocket and quic servers 72 var CloudhubHandler *MessageHandle 73 74 // InitHandler create a handler for websocket and quic servers 75 func InitHandler(eventq *channelq.ChannelMessageQueue) { 76 once.Do(func() { 77 CloudhubHandler = &MessageHandle{ 78 KeepaliveInterval: int(hubconfig.Config.KeepaliveInterval), 79 WriteTimeout: int(hubconfig.Config.WriteTimeout), 80 MessageQueue: eventq, 81 NodeLimit: int(hubconfig.Config.NodeLimit), 82 } 83 84 CloudhubHandler.KeepaliveChannel = make(map[string]chan struct{}) 85 CloudhubHandler.Handlers = []HandleFunc{ 86 CloudhubHandler.KeepaliveCheckLoop, 87 CloudhubHandler.MessageWriteLoop, 88 CloudhubHandler.ListMessageWriteLoop, 89 } 90 91 CloudhubHandler.initServerEntries() 92 }) 93 } 94 95 // initServerEntries register handler func 96 func (mh *MessageHandle) initServerEntries() { 97 mux.Entry(mux.NewPattern("*").Op("*"), mh.HandleServer) 98 } 99 100 // HandleServer handle all the request from node 101 func (mh *MessageHandle) HandleServer(container *mux.MessageContainer, writer mux.ResponseWriter) { 102 nodeID := container.Header.Get("node_id") 103 projectID := container.Header.Get("project_id") 104 105 if mh.GetNodeCount() >= mh.NodeLimit { 106 klog.Errorf("Fail to serve node %s, reach node limit", nodeID) 107 return 108 } 109 110 if container.Message.GetOperation() == model.OpKeepalive { 111 klog.Infof("Keepalive message received from node: %s", nodeID) 112 mh.KeepaliveChannel[nodeID] <- struct{}{} 113 return 114 } 115 116 // handle the reponse from edge 117 if VolumeRegExp.MatchString(container.Message.GetResource()) { 118 beehiveContext.SendResp(*container.Message) 119 return 120 } 121 122 // handle the ack message from edge 123 if container.Message.Router.Operation == beehiveModel.ResponseOperation { 124 if ackChan, ok := mh.MessageAcks.Load(container.Message.Header.ParentID); ok { 125 close(ackChan.(chan struct{})) 126 mh.MessageAcks.Delete(container.Message.Header.ParentID) 127 } 128 return 129 } 130 131 err := mh.PubToController(&model.HubInfo{ProjectID: projectID, NodeID: nodeID}, container.Message) 132 if err != nil { 133 // if err, we should stop node, write data to edgehub, stop nodify 134 klog.Errorf("Failed to serve handle with error: %s", err.Error()) 135 } 136 } 137 138 // OnRegister register node on first connection 139 func (mh *MessageHandle) OnRegister(connection conn.Connection) { 140 nodeID := connection.ConnectionState().Headers.Get("node_id") 141 projectID := connection.ConnectionState().Headers.Get("project_id") 142 143 if _, ok := mh.KeepaliveChannel[nodeID]; !ok { 144 mh.KeepaliveChannel[nodeID] = make(chan struct{}, 1) 145 } 146 147 io := &hubio.JSONIO{Connection: connection} 148 go mh.ServeConn(io, &model.HubInfo{ProjectID: projectID, NodeID: nodeID}) 149 } 150 151 // KeepaliveCheckLoop checks whether the edge node is still alive 152 func (mh *MessageHandle) KeepaliveCheckLoop(hi hubio.CloudHubIO, info *model.HubInfo, stopServe chan ExitCode, stopSendMsg chan struct{}) { 153 keepaliveTicker := time.NewTimer(time.Duration(mh.KeepaliveInterval) * time.Second) 154 for { 155 select { 156 case _, ok := <-mh.KeepaliveChannel[info.NodeID]: 157 if !ok { 158 return 159 } 160 klog.Infof("Node %s is still alive", info.NodeID) 161 keepaliveTicker.Reset(time.Duration(mh.KeepaliveInterval) * time.Second) 162 case <-keepaliveTicker.C: 163 klog.Warningf("Timeout to receive heart beat from edge node %s for project %s", 164 info.NodeID, info.ProjectID) 165 stopServe <- nodeDisconnect 166 close(stopSendMsg) 167 return 168 } 169 } 170 } 171 172 func dumpMessageMetadata(msg *beehiveModel.Message) string { 173 return fmt.Sprintf("id: %s, parent_id: %s, group: %s, source: %s, resource: %s, operation: %s", 174 msg.Header.ID, msg.Header.ParentID, msg.Router.Group, msg.Router.Source, msg.Router.Resource, msg.Router.Operation) 175 } 176 177 func trimMessage(msg *beehiveModel.Message) { 178 resource := msg.GetResource() 179 if strings.HasPrefix(resource, model.ResNode) { 180 tokens := strings.Split(resource, "/") 181 if len(tokens) < 3 { 182 klog.Warningf("event resource %s starts with node but length less than 3", resource) 183 } else { 184 msg.SetResourceOperation(strings.Join(tokens[2:], "/"), msg.GetOperation()) 185 } 186 } 187 } 188 189 func notifyEventQueueError(hi hubio.CloudHubIO, code ExitCode, nodeID string) { 190 if code == messageQueueDisconnect { 191 msg := beehiveModel.NewMessage("").BuildRouter(model.GpResource, model.SrcCloudHub, model.NewResource(model.ResNode, nodeID, nil), model.OpDisConnect) 192 err := hi.WriteData(msg) 193 if err != nil { 194 klog.Errorf("fail to notify node %s event queue disconnected, reason: %s", nodeID, err.Error()) 195 } 196 } 197 } 198 199 func constructConnectMessage(info *model.HubInfo, isConnected bool) *beehiveModel.Message { 200 connected := model.OpConnect 201 if !isConnected { 202 connected = model.OpDisConnect 203 } 204 body := map[string]interface{}{ 205 "event_type": connected, 206 "timestamp": time.Now().Unix(), 207 "client_id": info.NodeID} 208 content, _ := json.Marshal(body) 209 210 msg := beehiveModel.NewMessage("") 211 msg.BuildRouter(model.SrcCloudHub, model.GpResource, model.NewResource(model.ResNode, info.NodeID, nil), connected) 212 msg.FillBody(content) 213 return msg 214 } 215 216 func (mh *MessageHandle) PubToController(info *model.HubInfo, msg *beehiveModel.Message) error { 217 msg.SetResourceOperation(fmt.Sprintf("node/%s/%s", info.NodeID, msg.GetResource()), msg.GetOperation()) 218 klog.Infof("event received for node %s %s, content: %s", info.NodeID, dumpMessageMetadata(msg), msg.Content) 219 if model.IsFromEdge(msg) { 220 err := mh.MessageQueue.Publish(msg) 221 if err != nil { 222 // content is not logged since it may contain sensitive information 223 klog.Errorf("fail to publish event for node %s, %s, reason: %s", 224 info.NodeID, dumpMessageMetadata(msg), err.Error()) 225 return err 226 } 227 } 228 return nil 229 } 230 231 func (mh *MessageHandle) hubIoWrite(hi hubio.CloudHubIO, nodeID string, msg *beehiveModel.Message) error { 232 value, ok := mh.nodeLocks.Load(nodeID) 233 if !ok { 234 return fmt.Errorf("node disconnected") 235 } 236 mutex := value.(*sync.Mutex) 237 mutex.Lock() 238 defer mutex.Unlock() 239 240 return hi.WriteData(msg) 241 } 242 243 // ServeConn starts serving the incoming connection 244 func (mh *MessageHandle) ServeConn(hi hubio.CloudHubIO, info *model.HubInfo) { 245 err := mh.RegisterNode(hi, info) 246 if err != nil { 247 klog.Errorf("fail to register node %s, reason %s", info.NodeID, err.Error()) 248 return 249 } 250 251 klog.Infof("edge node %s for project %s connected", info.NodeID, info.ProjectID) 252 exitServe := make(chan ExitCode, 3) 253 stopSendMsg := make(chan struct{}) 254 255 for _, handle := range mh.Handlers { 256 go handle(hi, info, exitServe, stopSendMsg) 257 } 258 259 code := <-exitServe 260 mh.UnregisterNode(hi, info, code) 261 } 262 263 // RegisterNode register node in cloudhub for the incoming connection 264 func (mh *MessageHandle) RegisterNode(hi hubio.CloudHubIO, info *model.HubInfo) error { 265 mh.MessageQueue.Connect(info) 266 267 err := mh.MessageQueue.Publish(constructConnectMessage(info, true)) 268 if err != nil { 269 klog.Errorf("fail to publish node connect event for node %s, reason %s", info.NodeID, err.Error()) 270 notifyEventQueueError(hi, messageQueueDisconnect, info.NodeID) 271 err = hi.Close() 272 if err != nil { 273 klog.Errorf("fail to close connection, reason: %s", err.Error()) 274 } 275 return err 276 } 277 278 mh.nodeConns.Store(info.NodeID, hi) 279 mh.nodeLocks.Store(info.NodeID, &sync.Mutex{}) 280 mh.Nodes.Store(info.NodeID, true) 281 return nil 282 } 283 284 // UnregisterNode unregister node in cloudhub 285 func (mh *MessageHandle) UnregisterNode(hi hubio.CloudHubIO, info *model.HubInfo, code ExitCode) { 286 mh.nodeLocks.Delete(info.NodeID) 287 mh.nodeConns.Delete(info.NodeID) 288 close(mh.KeepaliveChannel[info.NodeID]) 289 delete(mh.KeepaliveChannel, info.NodeID) 290 291 err := mh.MessageQueue.Publish(constructConnectMessage(info, false)) 292 if err != nil { 293 klog.Errorf("fail to publish node disconnect event for node %s, reason %s", info.NodeID, err.Error()) 294 } 295 notifyEventQueueError(hi, code, info.NodeID) 296 mh.Nodes.Delete(info.NodeID) 297 err = hi.Close() 298 if err != nil { 299 klog.Errorf("fail to close connection, reason: %s", err.Error()) 300 } 301 302 // delete the nodeQueue and nodeStore when node stopped 303 if code == nodeStop { 304 mh.MessageQueue.Close(info) 305 } 306 } 307 308 // GetNodeCount returns the number of connected Nodes 309 func (mh *MessageHandle) GetNodeCount() int { 310 var num int 311 iter := func(key, value interface{}) bool { 312 num++ 313 return true 314 } 315 mh.Nodes.Range(iter) 316 return num 317 } 318 319 // ListMessageWriteLoop processes all list type resource write requests 320 func (mh *MessageHandle) ListMessageWriteLoop(hi hubio.CloudHubIO, info *model.HubInfo, stopServe chan ExitCode, stopSendMsg chan struct{}) { 321 nodeListQueue, err := mh.MessageQueue.GetNodeListQueue(info.NodeID) 322 if err != nil { 323 klog.Errorf("Failed to get nodeQueue for node %s: %v", info.NodeID, err) 324 stopServe <- messageQueueDisconnect 325 return 326 } 327 nodeListStore, err := mh.MessageQueue.GetNodeListStore(info.NodeID) 328 if err != nil { 329 klog.Errorf("Failed to get nodeStore for node %s: %v", info.NodeID, err) 330 stopServe <- messageQueueDisconnect 331 return 332 } 333 for { 334 select { 335 case <-stopSendMsg: 336 klog.Errorf("Node %s disconnected and stopped sending messages", info.NodeID) 337 return 338 default: 339 mh.handleMessage(nodeListQueue, nodeListStore, hi, info, stopServe, "listMessage") 340 } 341 } 342 } 343 344 // MessageWriteLoop processes all write requests 345 func (mh *MessageHandle) MessageWriteLoop(hi hubio.CloudHubIO, info *model.HubInfo, stopServe chan ExitCode, stopSendMsg chan struct{}) { 346 nodeQueue, err := mh.MessageQueue.GetNodeQueue(info.NodeID) 347 if err != nil { 348 klog.Errorf("Failed to get nodeQueue for node %s: %v", info.NodeID, err) 349 stopServe <- messageQueueDisconnect 350 return 351 } 352 nodeStore, err := mh.MessageQueue.GetNodeStore(info.NodeID) 353 if err != nil { 354 klog.Errorf("Failed to get nodeStore for node %s: %v", info.NodeID, err) 355 stopServe <- messageQueueDisconnect 356 return 357 } 358 359 for { 360 select { 361 case <-stopSendMsg: 362 klog.Errorf("Node %s disconnected and stopped sending messages", info.NodeID) 363 return 364 default: 365 mh.handleMessage(nodeQueue, nodeStore, hi, info, stopServe, "message") 366 } 367 } 368 } 369 370 func (mh *MessageHandle) handleMessage(nodeQueue workqueue.RateLimitingInterface, 371 nodeStore cache.Store, hi hubio.CloudHubIO, 372 info *model.HubInfo, stopServe chan ExitCode, msgType string) { 373 key, quit := nodeQueue.Get() 374 if quit { 375 klog.Errorf("nodeQueue for node %s has shutdown", info.NodeID) 376 return 377 } 378 obj, exist, _ := nodeStore.GetByKey(key.(string)) 379 if !exist { 380 klog.Errorf("nodeStore for node %s doesn't exist", info.NodeID) 381 return 382 } 383 384 msg := obj.(*beehiveModel.Message) 385 386 if model.IsNodeStopped(msg) { 387 klog.Infof("node %s is stopped, will disconnect", info.NodeID) 388 stopServe <- nodeStop 389 return 390 } 391 if !model.IsToEdge(msg) { 392 klog.Infof("skip only to cloud event for node %s, %s, content %s", info.NodeID, dumpMessageMetadata(msg), msg.Content) 393 return 394 } 395 klog.V(4).Infof("event to send for node %s, %s, content %s", info.NodeID, dumpMessageMetadata(msg), msg.Content) 396 397 copyMsg := deepcopy(msg) 398 trimMessage(msg) 399 err := hi.SetWriteDeadline(time.Now().Add(time.Duration(mh.WriteTimeout) * time.Second)) 400 if err != nil { 401 klog.Errorf("SetWriteDeadline error, %s", err.Error()) 402 stopServe <- hubioWriteFail 403 return 404 } 405 if msgType == "listMessage" { 406 mh.send(hi, info, msg) 407 // delete successfully sent events from the queue/store 408 nodeStore.Delete(msg) 409 } else { 410 mh.sendMsg(hi, info, msg, copyMsg, nodeStore) 411 } 412 413 nodeQueue.Done(key) 414 } 415 416 func (mh *MessageHandle) sendMsg(hi hubio.CloudHubIO, info *model.HubInfo, msg, copyMsg *beehiveModel.Message, nodeStore cache.Store) { 417 ackChan := make(chan struct{}) 418 mh.MessageAcks.Store(msg.GetID(), ackChan) 419 420 // initialize timer and retry count for sending message 421 var ( 422 retry = 0 423 retryInterval time.Duration = 5 424 ) 425 ticker := time.NewTimer(retryInterval * time.Second) 426 mh.send(hi, info, msg) 427 428 LOOP: 429 for { 430 select { 431 case <-ackChan: 432 mh.saveSuccessPoint(copyMsg, info, nodeStore) 433 break LOOP 434 case <-ticker.C: 435 if retry == 4 { 436 break LOOP 437 } 438 mh.send(hi, info, msg) 439 retry++ 440 ticker.Reset(time.Second * retryInterval) 441 } 442 } 443 } 444 445 func (mh *MessageHandle) send(hi hubio.CloudHubIO, info *model.HubInfo, msg *beehiveModel.Message) { 446 err := mh.hubIoWrite(hi, info.NodeID, msg) 447 if err != nil { 448 klog.Errorf("write error, connection for node %s will be closed, affected event %s, reason %s", 449 info.NodeID, dumpMessageMetadata(msg), err.Error()) 450 return 451 } 452 } 453 454 func (mh *MessageHandle) saveSuccessPoint(msg *beehiveModel.Message, info *model.HubInfo, nodeStore cache.Store) { 455 if msg.GetGroup() == edgeconst.GroupResource { 456 resourceNamespace, _ := edgemessagelayer.GetNamespace(*msg) 457 resourceName, _ := edgemessagelayer.GetResourceName(*msg) 458 resourceType, _ := edgemessagelayer.GetResourceType(*msg) 459 resourceUID, err := channelq.GetMessageUID(*msg) 460 if err != nil { 461 return 462 } 463 464 objectSyncName := synccontroller.BuildObjectSyncName(info.NodeID, resourceUID) 465 466 if msg.GetOperation() == beehiveModel.DeleteOperation { 467 nodeStore.Delete(msg) 468 mh.deleteSuccessPoint(resourceNamespace, objectSyncName) 469 return 470 } 471 472 objectSync, err := mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).Get(objectSyncName, metav1.GetOptions{}) 473 if err == nil { 474 objectSync.Status.ObjectResourceVersion = msg.GetResourceVersion() 475 _, err := mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).UpdateStatus(objectSync) 476 if err != nil { 477 klog.Errorf("Failed to update objectSync: %v, resourceType: %s, resourceNamespace: %s, resourceName: %s", 478 err, resourceType, resourceNamespace, resourceName) 479 } 480 } else if err != nil && apierrors.IsNotFound(err) { 481 objectSync := &v1alpha1.ObjectSync{ 482 ObjectMeta: metav1.ObjectMeta{ 483 Name: objectSyncName, 484 }, 485 Spec: v1alpha1.ObjectSyncSpec{ 486 ObjectAPIVersion: "", 487 ObjectKind: resourceType, 488 ObjectName: resourceName, 489 }, 490 } 491 _, err := mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).Create(objectSync) 492 if err != nil { 493 klog.Errorf("Failed to create objectSync: %s, err: %v", objectSyncName, err) 494 return 495 } 496 497 objectSyncStatus, err := mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).Get(objectSyncName, metav1.GetOptions{}) 498 if err != nil { 499 klog.Errorf("Failed to get objectSync: %s, err: %v", objectSyncName, err) 500 } 501 objectSyncStatus.Status.ObjectResourceVersion = msg.GetResourceVersion() 502 mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).UpdateStatus(objectSyncStatus) 503 } 504 } 505 506 // TODO: save device info 507 if msg.GetGroup() == deviceconst.GroupTwin { 508 } 509 klog.Infof("saveSuccessPoint successfully for message: %s", msg.GetResource()) 510 } 511 512 func (mh *MessageHandle) deleteSuccessPoint(resourceNamespace, objectSyncName string) { 513 mh.MessageQueue.ObjectSyncController.CrdClient.ReliablesyncsV1alpha1().ObjectSyncs(resourceNamespace).Delete(objectSyncName, metav1.NewDeleteOptions(0)) 514 } 515 516 func deepcopy(msg *beehiveModel.Message) *beehiveModel.Message { 517 if msg == nil { 518 return nil 519 } 520 out := new(beehiveModel.Message) 521 out.Header = msg.Header 522 out.Router = msg.Router 523 out.Content = msg.Content 524 return out 525 }