github.com/psyb0t/mattermost-server@v4.6.1-0.20180125161845-5503a1351abf+incompatible/app/web_hub.go (about) 1 // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. 2 // See License.txt for license information. 3 4 package app 5 6 import ( 7 "fmt" 8 "hash/fnv" 9 "runtime" 10 "runtime/debug" 11 "strconv" 12 "strings" 13 "sync/atomic" 14 "time" 15 16 l4g "github.com/alecthomas/log4go" 17 18 "github.com/mattermost/mattermost-server/model" 19 "github.com/mattermost/mattermost-server/utils" 20 ) 21 22 const ( 23 BROADCAST_QUEUE_SIZE = 4096 24 DEADLOCK_TICKER = 15 * time.Second // check every 15 seconds 25 DEADLOCK_WARN = (BROADCAST_QUEUE_SIZE * 99) / 100 // number of buffered messages before printing stack trace 26 ) 27 28 type Hub struct { 29 // connectionCount should be kept first. 30 // See https://github.com/mattermost/mattermost-server/pull/7281 31 connectionCount int64 32 app *App 33 connections []*WebConn 34 connectionIndex int 35 register chan *WebConn 36 unregister chan *WebConn 37 broadcast chan *model.WebSocketEvent 38 stop chan struct{} 39 didStop chan struct{} 40 invalidateUser chan string 41 ExplicitStop bool 42 goroutineId int 43 } 44 45 func (a *App) NewWebHub() *Hub { 46 return &Hub{ 47 app: a, 48 register: make(chan *WebConn, 1), 49 unregister: make(chan *WebConn, 1), 50 connections: make([]*WebConn, 0, model.SESSION_CACHE_SIZE), 51 broadcast: make(chan *model.WebSocketEvent, BROADCAST_QUEUE_SIZE), 52 stop: make(chan struct{}), 53 didStop: make(chan struct{}), 54 invalidateUser: make(chan string), 55 ExplicitStop: false, 56 } 57 } 58 59 func (a *App) TotalWebsocketConnections() int { 60 count := int64(0) 61 for _, hub := range a.Hubs { 62 count = count + atomic.LoadInt64(&hub.connectionCount) 63 } 64 65 return int(count) 66 } 67 68 func (a *App) HubStart() { 69 // Total number of hubs is twice the number of CPUs. 70 numberOfHubs := runtime.NumCPU() * 2 71 l4g.Info(utils.T("api.web_hub.start.starting.debug"), numberOfHubs) 72 73 a.Hubs = make([]*Hub, numberOfHubs) 74 a.HubsStopCheckingForDeadlock = make(chan bool, 1) 75 76 for i := 0; i < len(a.Hubs); i++ { 77 a.Hubs[i] = a.NewWebHub() 78 a.Hubs[i].connectionIndex = i 79 a.Hubs[i].Start() 80 } 81 82 go func() { 83 ticker := time.NewTicker(DEADLOCK_TICKER) 84 85 defer func() { 86 ticker.Stop() 87 }() 88 89 for { 90 select { 91 case <-ticker.C: 92 for _, hub := range a.Hubs { 93 if len(hub.broadcast) >= DEADLOCK_WARN { 94 l4g.Error("Hub processing might be deadlock on hub %v goroutine %v with %v events in the buffer", hub.connectionIndex, hub.goroutineId, len(hub.broadcast)) 95 buf := make([]byte, 1<<16) 96 runtime.Stack(buf, true) 97 output := fmt.Sprintf("%s", buf) 98 splits := strings.Split(output, "goroutine ") 99 100 for _, part := range splits { 101 if strings.Contains(part, fmt.Sprintf("%v", hub.goroutineId)) { 102 l4g.Error("Trace for possible deadlock goroutine %v", part) 103 } 104 } 105 } 106 } 107 108 case <-a.HubsStopCheckingForDeadlock: 109 return 110 } 111 } 112 }() 113 } 114 115 func (a *App) HubStop() { 116 l4g.Info(utils.T("api.web_hub.start.stopping.debug")) 117 118 select { 119 case a.HubsStopCheckingForDeadlock <- true: 120 default: 121 l4g.Warn("We appear to have already sent the stop checking for deadlocks command") 122 } 123 124 for _, hub := range a.Hubs { 125 hub.Stop() 126 } 127 128 a.Hubs = []*Hub{} 129 } 130 131 func (a *App) GetHubForUserId(userId string) *Hub { 132 hash := fnv.New32a() 133 hash.Write([]byte(userId)) 134 index := hash.Sum32() % uint32(len(a.Hubs)) 135 return a.Hubs[index] 136 } 137 138 func (a *App) HubRegister(webConn *WebConn) { 139 a.GetHubForUserId(webConn.UserId).Register(webConn) 140 } 141 142 func (a *App) HubUnregister(webConn *WebConn) { 143 a.GetHubForUserId(webConn.UserId).Unregister(webConn) 144 } 145 146 func (a *App) Publish(message *model.WebSocketEvent) { 147 if metrics := a.Metrics; metrics != nil { 148 metrics.IncrementWebsocketEvent(message.Event) 149 } 150 151 a.PublishSkipClusterSend(message) 152 153 if a.Cluster != nil { 154 cm := &model.ClusterMessage{ 155 Event: model.CLUSTER_EVENT_PUBLISH, 156 SendType: model.CLUSTER_SEND_BEST_EFFORT, 157 Data: message.ToJson(), 158 } 159 160 if message.Event == model.WEBSOCKET_EVENT_POSTED || 161 message.Event == model.WEBSOCKET_EVENT_POST_EDITED || 162 message.Event == model.WEBSOCKET_EVENT_DIRECT_ADDED || 163 message.Event == model.WEBSOCKET_EVENT_GROUP_ADDED || 164 message.Event == model.WEBSOCKET_EVENT_ADDED_TO_TEAM { 165 cm.SendType = model.CLUSTER_SEND_RELIABLE 166 } 167 168 a.Cluster.SendClusterMessage(cm) 169 } 170 } 171 172 func (a *App) PublishSkipClusterSend(message *model.WebSocketEvent) { 173 for _, hub := range a.Hubs { 174 hub.Broadcast(message) 175 } 176 } 177 178 func (a *App) InvalidateCacheForChannel(channel *model.Channel) { 179 a.InvalidateCacheForChannelSkipClusterSend(channel.Id) 180 a.InvalidateCacheForChannelByNameSkipClusterSend(channel.TeamId, channel.Name) 181 182 if a.Cluster != nil { 183 msg := &model.ClusterMessage{ 184 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL, 185 SendType: model.CLUSTER_SEND_BEST_EFFORT, 186 Data: channel.Id, 187 } 188 189 a.Cluster.SendClusterMessage(msg) 190 191 nameMsg := &model.ClusterMessage{ 192 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_BY_NAME, 193 SendType: model.CLUSTER_SEND_BEST_EFFORT, 194 Props: make(map[string]string), 195 } 196 197 nameMsg.Props["name"] = channel.Name 198 if channel.TeamId == "" { 199 nameMsg.Props["id"] = "dm" 200 } else { 201 nameMsg.Props["id"] = channel.TeamId 202 } 203 204 a.Cluster.SendClusterMessage(nameMsg) 205 } 206 } 207 208 func (a *App) InvalidateCacheForChannelSkipClusterSend(channelId string) { 209 a.Srv.Store.Channel().InvalidateChannel(channelId) 210 } 211 212 func (a *App) InvalidateCacheForChannelMembers(channelId string) { 213 a.InvalidateCacheForChannelMembersSkipClusterSend(channelId) 214 215 if a.Cluster != nil { 216 msg := &model.ClusterMessage{ 217 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_MEMBERS, 218 SendType: model.CLUSTER_SEND_BEST_EFFORT, 219 Data: channelId, 220 } 221 a.Cluster.SendClusterMessage(msg) 222 } 223 } 224 225 func (a *App) InvalidateCacheForChannelMembersSkipClusterSend(channelId string) { 226 a.Srv.Store.User().InvalidateProfilesInChannelCache(channelId) 227 a.Srv.Store.Channel().InvalidateMemberCount(channelId) 228 } 229 230 func (a *App) InvalidateCacheForChannelMembersNotifyProps(channelId string) { 231 a.InvalidateCacheForChannelMembersNotifyPropsSkipClusterSend(channelId) 232 233 if a.Cluster != nil { 234 msg := &model.ClusterMessage{ 235 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_MEMBERS_NOTIFY_PROPS, 236 SendType: model.CLUSTER_SEND_BEST_EFFORT, 237 Data: channelId, 238 } 239 a.Cluster.SendClusterMessage(msg) 240 } 241 } 242 243 func (a *App) InvalidateCacheForChannelMembersNotifyPropsSkipClusterSend(channelId string) { 244 a.Srv.Store.Channel().InvalidateCacheForChannelMembersNotifyProps(channelId) 245 } 246 247 func (a *App) InvalidateCacheForChannelByNameSkipClusterSend(teamId, name string) { 248 if teamId == "" { 249 teamId = "dm" 250 } 251 252 a.Srv.Store.Channel().InvalidateChannelByName(teamId, name) 253 } 254 255 func (a *App) InvalidateCacheForChannelPosts(channelId string) { 256 a.InvalidateCacheForChannelPostsSkipClusterSend(channelId) 257 258 if a.Cluster != nil { 259 msg := &model.ClusterMessage{ 260 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_POSTS, 261 SendType: model.CLUSTER_SEND_BEST_EFFORT, 262 Data: channelId, 263 } 264 a.Cluster.SendClusterMessage(msg) 265 } 266 } 267 268 func (a *App) InvalidateCacheForChannelPostsSkipClusterSend(channelId string) { 269 a.Srv.Store.Post().InvalidateLastPostTimeCache(channelId) 270 } 271 272 func (a *App) InvalidateCacheForUser(userId string) { 273 a.InvalidateCacheForUserSkipClusterSend(userId) 274 275 if a.Cluster != nil { 276 msg := &model.ClusterMessage{ 277 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_USER, 278 SendType: model.CLUSTER_SEND_BEST_EFFORT, 279 Data: userId, 280 } 281 a.Cluster.SendClusterMessage(msg) 282 } 283 } 284 285 func (a *App) InvalidateCacheForUserSkipClusterSend(userId string) { 286 a.Srv.Store.Channel().InvalidateAllChannelMembersForUser(userId) 287 a.Srv.Store.User().InvalidateProfilesInChannelCacheByUser(userId) 288 a.Srv.Store.User().InvalidatProfileCacheForUser(userId) 289 290 if len(a.Hubs) != 0 { 291 a.GetHubForUserId(userId).InvalidateUser(userId) 292 } 293 } 294 295 func (a *App) InvalidateCacheForWebhook(webhookId string) { 296 a.InvalidateCacheForWebhookSkipClusterSend(webhookId) 297 298 if a.Cluster != nil { 299 msg := &model.ClusterMessage{ 300 Event: model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_WEBHOOK, 301 SendType: model.CLUSTER_SEND_BEST_EFFORT, 302 Data: webhookId, 303 } 304 a.Cluster.SendClusterMessage(msg) 305 } 306 } 307 308 func (a *App) InvalidateCacheForWebhookSkipClusterSend(webhookId string) { 309 a.Srv.Store.Webhook().InvalidateWebhookCache(webhookId) 310 } 311 312 func (a *App) InvalidateWebConnSessionCacheForUser(userId string) { 313 if len(a.Hubs) != 0 { 314 a.GetHubForUserId(userId).InvalidateUser(userId) 315 } 316 } 317 318 func (h *Hub) Register(webConn *WebConn) { 319 h.register <- webConn 320 321 if webConn.IsAuthenticated() { 322 webConn.SendHello() 323 } 324 } 325 326 func (h *Hub) Unregister(webConn *WebConn) { 327 select { 328 case h.unregister <- webConn: 329 case <-h.stop: 330 } 331 } 332 333 func (h *Hub) Broadcast(message *model.WebSocketEvent) { 334 if message != nil { 335 h.broadcast <- message 336 } 337 } 338 339 func (h *Hub) InvalidateUser(userId string) { 340 h.invalidateUser <- userId 341 } 342 343 func getGoroutineId() int { 344 var buf [64]byte 345 n := runtime.Stack(buf[:], false) 346 idField := strings.Fields(strings.TrimPrefix(string(buf[:n]), "goroutine "))[0] 347 id, err := strconv.Atoi(idField) 348 if err != nil { 349 id = -1 350 } 351 return id 352 } 353 354 func (h *Hub) Stop() { 355 close(h.stop) 356 <-h.didStop 357 } 358 359 func (h *Hub) Start() { 360 var doStart func() 361 var doRecoverableStart func() 362 var doRecover func() 363 364 doStart = func() { 365 366 h.goroutineId = getGoroutineId() 367 l4g.Debug("Hub for index %v is starting with goroutine %v", h.connectionIndex, h.goroutineId) 368 369 for { 370 select { 371 case webCon := <-h.register: 372 h.connections = append(h.connections, webCon) 373 atomic.StoreInt64(&h.connectionCount, int64(len(h.connections))) 374 375 case webCon := <-h.unregister: 376 userId := webCon.UserId 377 378 found := false 379 indexToDel := -1 380 for i, webConCandidate := range h.connections { 381 if webConCandidate == webCon { 382 indexToDel = i 383 continue 384 } 385 if userId == webConCandidate.UserId { 386 found = true 387 if indexToDel != -1 { 388 break 389 } 390 } 391 } 392 393 if indexToDel != -1 { 394 // Delete the webcon we are unregistering 395 h.connections[indexToDel] = h.connections[len(h.connections)-1] 396 h.connections = h.connections[:len(h.connections)-1] 397 } 398 399 if len(userId) == 0 { 400 continue 401 } 402 403 if !found { 404 h.app.Go(func() { 405 h.app.SetStatusOffline(userId, false) 406 }) 407 } 408 409 case userId := <-h.invalidateUser: 410 for _, webCon := range h.connections { 411 if webCon.UserId == userId { 412 webCon.InvalidateCache() 413 } 414 } 415 416 case msg := <-h.broadcast: 417 for _, webCon := range h.connections { 418 if webCon.ShouldSendEvent(msg) { 419 select { 420 case webCon.Send <- msg: 421 default: 422 l4g.Error(fmt.Sprintf("webhub.broadcast: cannot send, closing websocket for userId=%v", webCon.UserId)) 423 close(webCon.Send) 424 for i, webConCandidate := range h.connections { 425 if webConCandidate == webCon { 426 h.connections[i] = h.connections[len(h.connections)-1] 427 h.connections = h.connections[:len(h.connections)-1] 428 break 429 } 430 } 431 } 432 } 433 } 434 435 case <-h.stop: 436 userIds := make(map[string]bool) 437 438 for _, webCon := range h.connections { 439 userIds[webCon.UserId] = true 440 webCon.Close() 441 } 442 443 for userId := range userIds { 444 h.app.SetStatusOffline(userId, false) 445 } 446 447 h.connections = make([]*WebConn, 0, model.SESSION_CACHE_SIZE) 448 h.ExplicitStop = true 449 close(h.didStop) 450 451 return 452 } 453 } 454 } 455 456 doRecoverableStart = func() { 457 defer doRecover() 458 doStart() 459 } 460 461 doRecover = func() { 462 if !h.ExplicitStop { 463 if r := recover(); r != nil { 464 l4g.Error(fmt.Sprintf("Recovering from Hub panic. Panic was: %v", r)) 465 } else { 466 l4g.Error("Webhub stopped unexpectedly. Recovering.") 467 } 468 469 l4g.Error(string(debug.Stack())) 470 471 go doRecoverableStart() 472 } 473 } 474 475 go doRecoverableStart() 476 }