github.com/psyb0t/mattermost-server@v4.6.1-0.20180125161845-5503a1351abf+incompatible/app/web_hub.go (about)

     1  // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
     2  // See License.txt for license information.
     3  
     4  package app
     5  
     6  import (
     7  	"fmt"
     8  	"hash/fnv"
     9  	"runtime"
    10  	"runtime/debug"
    11  	"strconv"
    12  	"strings"
    13  	"sync/atomic"
    14  	"time"
    15  
    16  	l4g "github.com/alecthomas/log4go"
    17  
    18  	"github.com/mattermost/mattermost-server/model"
    19  	"github.com/mattermost/mattermost-server/utils"
    20  )
    21  
    22  const (
    23  	BROADCAST_QUEUE_SIZE = 4096
    24  	DEADLOCK_TICKER      = 15 * time.Second                  // check every 15 seconds
    25  	DEADLOCK_WARN        = (BROADCAST_QUEUE_SIZE * 99) / 100 // number of buffered messages before printing stack trace
    26  )
    27  
    28  type Hub struct {
    29  	// connectionCount should be kept first.
    30  	// See https://github.com/mattermost/mattermost-server/pull/7281
    31  	connectionCount int64
    32  	app             *App
    33  	connections     []*WebConn
    34  	connectionIndex int
    35  	register        chan *WebConn
    36  	unregister      chan *WebConn
    37  	broadcast       chan *model.WebSocketEvent
    38  	stop            chan struct{}
    39  	didStop         chan struct{}
    40  	invalidateUser  chan string
    41  	ExplicitStop    bool
    42  	goroutineId     int
    43  }
    44  
    45  func (a *App) NewWebHub() *Hub {
    46  	return &Hub{
    47  		app:            a,
    48  		register:       make(chan *WebConn, 1),
    49  		unregister:     make(chan *WebConn, 1),
    50  		connections:    make([]*WebConn, 0, model.SESSION_CACHE_SIZE),
    51  		broadcast:      make(chan *model.WebSocketEvent, BROADCAST_QUEUE_SIZE),
    52  		stop:           make(chan struct{}),
    53  		didStop:        make(chan struct{}),
    54  		invalidateUser: make(chan string),
    55  		ExplicitStop:   false,
    56  	}
    57  }
    58  
    59  func (a *App) TotalWebsocketConnections() int {
    60  	count := int64(0)
    61  	for _, hub := range a.Hubs {
    62  		count = count + atomic.LoadInt64(&hub.connectionCount)
    63  	}
    64  
    65  	return int(count)
    66  }
    67  
    68  func (a *App) HubStart() {
    69  	// Total number of hubs is twice the number of CPUs.
    70  	numberOfHubs := runtime.NumCPU() * 2
    71  	l4g.Info(utils.T("api.web_hub.start.starting.debug"), numberOfHubs)
    72  
    73  	a.Hubs = make([]*Hub, numberOfHubs)
    74  	a.HubsStopCheckingForDeadlock = make(chan bool, 1)
    75  
    76  	for i := 0; i < len(a.Hubs); i++ {
    77  		a.Hubs[i] = a.NewWebHub()
    78  		a.Hubs[i].connectionIndex = i
    79  		a.Hubs[i].Start()
    80  	}
    81  
    82  	go func() {
    83  		ticker := time.NewTicker(DEADLOCK_TICKER)
    84  
    85  		defer func() {
    86  			ticker.Stop()
    87  		}()
    88  
    89  		for {
    90  			select {
    91  			case <-ticker.C:
    92  				for _, hub := range a.Hubs {
    93  					if len(hub.broadcast) >= DEADLOCK_WARN {
    94  						l4g.Error("Hub processing might be deadlock on hub %v goroutine %v with %v events in the buffer", hub.connectionIndex, hub.goroutineId, len(hub.broadcast))
    95  						buf := make([]byte, 1<<16)
    96  						runtime.Stack(buf, true)
    97  						output := fmt.Sprintf("%s", buf)
    98  						splits := strings.Split(output, "goroutine ")
    99  
   100  						for _, part := range splits {
   101  							if strings.Contains(part, fmt.Sprintf("%v", hub.goroutineId)) {
   102  								l4g.Error("Trace for possible deadlock goroutine %v", part)
   103  							}
   104  						}
   105  					}
   106  				}
   107  
   108  			case <-a.HubsStopCheckingForDeadlock:
   109  				return
   110  			}
   111  		}
   112  	}()
   113  }
   114  
   115  func (a *App) HubStop() {
   116  	l4g.Info(utils.T("api.web_hub.start.stopping.debug"))
   117  
   118  	select {
   119  	case a.HubsStopCheckingForDeadlock <- true:
   120  	default:
   121  		l4g.Warn("We appear to have already sent the stop checking for deadlocks command")
   122  	}
   123  
   124  	for _, hub := range a.Hubs {
   125  		hub.Stop()
   126  	}
   127  
   128  	a.Hubs = []*Hub{}
   129  }
   130  
   131  func (a *App) GetHubForUserId(userId string) *Hub {
   132  	hash := fnv.New32a()
   133  	hash.Write([]byte(userId))
   134  	index := hash.Sum32() % uint32(len(a.Hubs))
   135  	return a.Hubs[index]
   136  }
   137  
   138  func (a *App) HubRegister(webConn *WebConn) {
   139  	a.GetHubForUserId(webConn.UserId).Register(webConn)
   140  }
   141  
   142  func (a *App) HubUnregister(webConn *WebConn) {
   143  	a.GetHubForUserId(webConn.UserId).Unregister(webConn)
   144  }
   145  
   146  func (a *App) Publish(message *model.WebSocketEvent) {
   147  	if metrics := a.Metrics; metrics != nil {
   148  		metrics.IncrementWebsocketEvent(message.Event)
   149  	}
   150  
   151  	a.PublishSkipClusterSend(message)
   152  
   153  	if a.Cluster != nil {
   154  		cm := &model.ClusterMessage{
   155  			Event:    model.CLUSTER_EVENT_PUBLISH,
   156  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   157  			Data:     message.ToJson(),
   158  		}
   159  
   160  		if message.Event == model.WEBSOCKET_EVENT_POSTED ||
   161  			message.Event == model.WEBSOCKET_EVENT_POST_EDITED ||
   162  			message.Event == model.WEBSOCKET_EVENT_DIRECT_ADDED ||
   163  			message.Event == model.WEBSOCKET_EVENT_GROUP_ADDED ||
   164  			message.Event == model.WEBSOCKET_EVENT_ADDED_TO_TEAM {
   165  			cm.SendType = model.CLUSTER_SEND_RELIABLE
   166  		}
   167  
   168  		a.Cluster.SendClusterMessage(cm)
   169  	}
   170  }
   171  
   172  func (a *App) PublishSkipClusterSend(message *model.WebSocketEvent) {
   173  	for _, hub := range a.Hubs {
   174  		hub.Broadcast(message)
   175  	}
   176  }
   177  
   178  func (a *App) InvalidateCacheForChannel(channel *model.Channel) {
   179  	a.InvalidateCacheForChannelSkipClusterSend(channel.Id)
   180  	a.InvalidateCacheForChannelByNameSkipClusterSend(channel.TeamId, channel.Name)
   181  
   182  	if a.Cluster != nil {
   183  		msg := &model.ClusterMessage{
   184  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL,
   185  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   186  			Data:     channel.Id,
   187  		}
   188  
   189  		a.Cluster.SendClusterMessage(msg)
   190  
   191  		nameMsg := &model.ClusterMessage{
   192  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_BY_NAME,
   193  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   194  			Props:    make(map[string]string),
   195  		}
   196  
   197  		nameMsg.Props["name"] = channel.Name
   198  		if channel.TeamId == "" {
   199  			nameMsg.Props["id"] = "dm"
   200  		} else {
   201  			nameMsg.Props["id"] = channel.TeamId
   202  		}
   203  
   204  		a.Cluster.SendClusterMessage(nameMsg)
   205  	}
   206  }
   207  
   208  func (a *App) InvalidateCacheForChannelSkipClusterSend(channelId string) {
   209  	a.Srv.Store.Channel().InvalidateChannel(channelId)
   210  }
   211  
   212  func (a *App) InvalidateCacheForChannelMembers(channelId string) {
   213  	a.InvalidateCacheForChannelMembersSkipClusterSend(channelId)
   214  
   215  	if a.Cluster != nil {
   216  		msg := &model.ClusterMessage{
   217  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_MEMBERS,
   218  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   219  			Data:     channelId,
   220  		}
   221  		a.Cluster.SendClusterMessage(msg)
   222  	}
   223  }
   224  
   225  func (a *App) InvalidateCacheForChannelMembersSkipClusterSend(channelId string) {
   226  	a.Srv.Store.User().InvalidateProfilesInChannelCache(channelId)
   227  	a.Srv.Store.Channel().InvalidateMemberCount(channelId)
   228  }
   229  
   230  func (a *App) InvalidateCacheForChannelMembersNotifyProps(channelId string) {
   231  	a.InvalidateCacheForChannelMembersNotifyPropsSkipClusterSend(channelId)
   232  
   233  	if a.Cluster != nil {
   234  		msg := &model.ClusterMessage{
   235  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_MEMBERS_NOTIFY_PROPS,
   236  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   237  			Data:     channelId,
   238  		}
   239  		a.Cluster.SendClusterMessage(msg)
   240  	}
   241  }
   242  
   243  func (a *App) InvalidateCacheForChannelMembersNotifyPropsSkipClusterSend(channelId string) {
   244  	a.Srv.Store.Channel().InvalidateCacheForChannelMembersNotifyProps(channelId)
   245  }
   246  
   247  func (a *App) InvalidateCacheForChannelByNameSkipClusterSend(teamId, name string) {
   248  	if teamId == "" {
   249  		teamId = "dm"
   250  	}
   251  
   252  	a.Srv.Store.Channel().InvalidateChannelByName(teamId, name)
   253  }
   254  
   255  func (a *App) InvalidateCacheForChannelPosts(channelId string) {
   256  	a.InvalidateCacheForChannelPostsSkipClusterSend(channelId)
   257  
   258  	if a.Cluster != nil {
   259  		msg := &model.ClusterMessage{
   260  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_CHANNEL_POSTS,
   261  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   262  			Data:     channelId,
   263  		}
   264  		a.Cluster.SendClusterMessage(msg)
   265  	}
   266  }
   267  
   268  func (a *App) InvalidateCacheForChannelPostsSkipClusterSend(channelId string) {
   269  	a.Srv.Store.Post().InvalidateLastPostTimeCache(channelId)
   270  }
   271  
   272  func (a *App) InvalidateCacheForUser(userId string) {
   273  	a.InvalidateCacheForUserSkipClusterSend(userId)
   274  
   275  	if a.Cluster != nil {
   276  		msg := &model.ClusterMessage{
   277  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_USER,
   278  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   279  			Data:     userId,
   280  		}
   281  		a.Cluster.SendClusterMessage(msg)
   282  	}
   283  }
   284  
   285  func (a *App) InvalidateCacheForUserSkipClusterSend(userId string) {
   286  	a.Srv.Store.Channel().InvalidateAllChannelMembersForUser(userId)
   287  	a.Srv.Store.User().InvalidateProfilesInChannelCacheByUser(userId)
   288  	a.Srv.Store.User().InvalidatProfileCacheForUser(userId)
   289  
   290  	if len(a.Hubs) != 0 {
   291  		a.GetHubForUserId(userId).InvalidateUser(userId)
   292  	}
   293  }
   294  
   295  func (a *App) InvalidateCacheForWebhook(webhookId string) {
   296  	a.InvalidateCacheForWebhookSkipClusterSend(webhookId)
   297  
   298  	if a.Cluster != nil {
   299  		msg := &model.ClusterMessage{
   300  			Event:    model.CLUSTER_EVENT_INVALIDATE_CACHE_FOR_WEBHOOK,
   301  			SendType: model.CLUSTER_SEND_BEST_EFFORT,
   302  			Data:     webhookId,
   303  		}
   304  		a.Cluster.SendClusterMessage(msg)
   305  	}
   306  }
   307  
   308  func (a *App) InvalidateCacheForWebhookSkipClusterSend(webhookId string) {
   309  	a.Srv.Store.Webhook().InvalidateWebhookCache(webhookId)
   310  }
   311  
   312  func (a *App) InvalidateWebConnSessionCacheForUser(userId string) {
   313  	if len(a.Hubs) != 0 {
   314  		a.GetHubForUserId(userId).InvalidateUser(userId)
   315  	}
   316  }
   317  
   318  func (h *Hub) Register(webConn *WebConn) {
   319  	h.register <- webConn
   320  
   321  	if webConn.IsAuthenticated() {
   322  		webConn.SendHello()
   323  	}
   324  }
   325  
   326  func (h *Hub) Unregister(webConn *WebConn) {
   327  	select {
   328  	case h.unregister <- webConn:
   329  	case <-h.stop:
   330  	}
   331  }
   332  
   333  func (h *Hub) Broadcast(message *model.WebSocketEvent) {
   334  	if message != nil {
   335  		h.broadcast <- message
   336  	}
   337  }
   338  
   339  func (h *Hub) InvalidateUser(userId string) {
   340  	h.invalidateUser <- userId
   341  }
   342  
   343  func getGoroutineId() int {
   344  	var buf [64]byte
   345  	n := runtime.Stack(buf[:], false)
   346  	idField := strings.Fields(strings.TrimPrefix(string(buf[:n]), "goroutine "))[0]
   347  	id, err := strconv.Atoi(idField)
   348  	if err != nil {
   349  		id = -1
   350  	}
   351  	return id
   352  }
   353  
   354  func (h *Hub) Stop() {
   355  	close(h.stop)
   356  	<-h.didStop
   357  }
   358  
   359  func (h *Hub) Start() {
   360  	var doStart func()
   361  	var doRecoverableStart func()
   362  	var doRecover func()
   363  
   364  	doStart = func() {
   365  
   366  		h.goroutineId = getGoroutineId()
   367  		l4g.Debug("Hub for index %v is starting with goroutine %v", h.connectionIndex, h.goroutineId)
   368  
   369  		for {
   370  			select {
   371  			case webCon := <-h.register:
   372  				h.connections = append(h.connections, webCon)
   373  				atomic.StoreInt64(&h.connectionCount, int64(len(h.connections)))
   374  
   375  			case webCon := <-h.unregister:
   376  				userId := webCon.UserId
   377  
   378  				found := false
   379  				indexToDel := -1
   380  				for i, webConCandidate := range h.connections {
   381  					if webConCandidate == webCon {
   382  						indexToDel = i
   383  						continue
   384  					}
   385  					if userId == webConCandidate.UserId {
   386  						found = true
   387  						if indexToDel != -1 {
   388  							break
   389  						}
   390  					}
   391  				}
   392  
   393  				if indexToDel != -1 {
   394  					// Delete the webcon we are unregistering
   395  					h.connections[indexToDel] = h.connections[len(h.connections)-1]
   396  					h.connections = h.connections[:len(h.connections)-1]
   397  				}
   398  
   399  				if len(userId) == 0 {
   400  					continue
   401  				}
   402  
   403  				if !found {
   404  					h.app.Go(func() {
   405  						h.app.SetStatusOffline(userId, false)
   406  					})
   407  				}
   408  
   409  			case userId := <-h.invalidateUser:
   410  				for _, webCon := range h.connections {
   411  					if webCon.UserId == userId {
   412  						webCon.InvalidateCache()
   413  					}
   414  				}
   415  
   416  			case msg := <-h.broadcast:
   417  				for _, webCon := range h.connections {
   418  					if webCon.ShouldSendEvent(msg) {
   419  						select {
   420  						case webCon.Send <- msg:
   421  						default:
   422  							l4g.Error(fmt.Sprintf("webhub.broadcast: cannot send, closing websocket for userId=%v", webCon.UserId))
   423  							close(webCon.Send)
   424  							for i, webConCandidate := range h.connections {
   425  								if webConCandidate == webCon {
   426  									h.connections[i] = h.connections[len(h.connections)-1]
   427  									h.connections = h.connections[:len(h.connections)-1]
   428  									break
   429  								}
   430  							}
   431  						}
   432  					}
   433  				}
   434  
   435  			case <-h.stop:
   436  				userIds := make(map[string]bool)
   437  
   438  				for _, webCon := range h.connections {
   439  					userIds[webCon.UserId] = true
   440  					webCon.Close()
   441  				}
   442  
   443  				for userId := range userIds {
   444  					h.app.SetStatusOffline(userId, false)
   445  				}
   446  
   447  				h.connections = make([]*WebConn, 0, model.SESSION_CACHE_SIZE)
   448  				h.ExplicitStop = true
   449  				close(h.didStop)
   450  
   451  				return
   452  			}
   453  		}
   454  	}
   455  
   456  	doRecoverableStart = func() {
   457  		defer doRecover()
   458  		doStart()
   459  	}
   460  
   461  	doRecover = func() {
   462  		if !h.ExplicitStop {
   463  			if r := recover(); r != nil {
   464  				l4g.Error(fmt.Sprintf("Recovering from Hub panic. Panic was: %v", r))
   465  			} else {
   466  				l4g.Error("Webhub stopped unexpectedly. Recovering.")
   467  			}
   468  
   469  			l4g.Error(string(debug.Stack()))
   470  
   471  			go doRecoverableStart()
   472  		}
   473  	}
   474  
   475  	go doRecoverableStart()
   476  }