github.com/status-im/status-go@v1.1.0/protocol/messenger_mailserver_cycle.go (about)

     1  package protocol
     2  
     3  import (
     4  	"context"
     5  	"crypto/rand"
     6  	"math"
     7  	"math/big"
     8  	"net"
     9  	"runtime"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/pkg/errors"
    15  	"go.uber.org/zap"
    16  
    17  	"github.com/waku-org/go-waku/waku/v2/utils"
    18  
    19  	"github.com/status-im/status-go/params"
    20  	"github.com/status-im/status-go/protocol/storenodes"
    21  	"github.com/status-im/status-go/services/mailservers"
    22  	"github.com/status-im/status-go/signal"
    23  )
    24  
    25  const defaultBackoff = 10 * time.Second
    26  const graylistBackoff = 3 * time.Minute
    27  const backoffByUserAction = 0
    28  const isAndroidEmulator = runtime.GOOS == "android" && runtime.GOARCH == "amd64"
    29  const findNearestMailServer = !isAndroidEmulator
    30  const overrideDNS = runtime.GOOS == "android" || runtime.GOOS == "ios"
    31  const bootstrapDNS = "8.8.8.8:53"
    32  
    33  type byRTTMsAndCanConnectBefore []SortedMailserver
    34  
    35  func (s byRTTMsAndCanConnectBefore) Len() int {
    36  	return len(s)
    37  }
    38  
    39  func (s byRTTMsAndCanConnectBefore) Swap(i, j int) {
    40  	s[i], s[j] = s[j], s[i]
    41  }
    42  
    43  func (s byRTTMsAndCanConnectBefore) Less(i, j int) bool {
    44  	// Slightly inaccurate as time sensitive sorting, but it does not matter so much
    45  	now := time.Now()
    46  	if s[i].CanConnectAfter.Before(now) && s[j].CanConnectAfter.Before(now) {
    47  		return s[i].RTT < s[j].RTT
    48  	}
    49  	return s[i].CanConnectAfter.Before(s[j].CanConnectAfter)
    50  }
    51  
    52  func (m *Messenger) StartMailserverCycle(mailservers []mailservers.Mailserver) error {
    53  	if m.transport.WakuVersion() != 2 {
    54  		m.logger.Warn("not starting mailserver cycle: requires wakuv2")
    55  		return nil
    56  	}
    57  
    58  	m.mailserverCycle.allMailservers = mailservers
    59  
    60  	if len(mailservers) == 0 {
    61  		m.logger.Warn("not starting mailserver cycle: empty mailservers list")
    62  		return nil
    63  	}
    64  
    65  	for _, storenode := range mailservers {
    66  
    67  		peerInfo, err := storenode.PeerInfo()
    68  		if err != nil {
    69  			return err
    70  		}
    71  
    72  		for _, addr := range utils.EncapsulatePeerID(peerInfo.ID, peerInfo.Addrs...) {
    73  			_, err := m.transport.AddStorePeer(addr)
    74  			if err != nil {
    75  				return err
    76  			}
    77  		}
    78  	}
    79  	go m.verifyStorenodeStatus()
    80  
    81  	m.logger.Debug("starting mailserver cycle",
    82  		zap.Uint("WakuVersion", m.transport.WakuVersion()),
    83  		zap.Any("mailservers", mailservers),
    84  	)
    85  
    86  	return nil
    87  }
    88  
    89  func (m *Messenger) DisconnectActiveMailserver() {
    90  	m.mailserverCycle.Lock()
    91  	defer m.mailserverCycle.Unlock()
    92  	m.disconnectActiveMailserver(graylistBackoff)
    93  }
    94  
    95  func (m *Messenger) disconnectMailserver(backoffDuration time.Duration) error {
    96  	if m.mailserverCycle.activeMailserver == nil {
    97  		m.logger.Info("no active mailserver")
    98  		return nil
    99  	}
   100  	m.logger.Info("disconnecting active mailserver", zap.String("nodeID", m.mailserverCycle.activeMailserver.ID))
   101  	m.mailPeersMutex.Lock()
   102  	pInfo, ok := m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID]
   103  	if ok {
   104  		pInfo.status = disconnected
   105  
   106  		pInfo.canConnectAfter = time.Now().Add(backoffDuration)
   107  		m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID] = pInfo
   108  	} else {
   109  		m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID] = peerStatus{
   110  			status:          disconnected,
   111  			mailserver:      *m.mailserverCycle.activeMailserver,
   112  			canConnectAfter: time.Now().Add(backoffDuration),
   113  		}
   114  	}
   115  	m.mailPeersMutex.Unlock()
   116  
   117  	m.mailserverCycle.activeMailserver = nil
   118  	return nil
   119  }
   120  
   121  func (m *Messenger) disconnectActiveMailserver(backoffDuration time.Duration) {
   122  	err := m.disconnectMailserver(backoffDuration)
   123  	if err != nil {
   124  		m.logger.Error("failed to disconnect mailserver", zap.Error(err))
   125  	}
   126  	signal.SendMailserverChanged(nil)
   127  }
   128  
   129  func (m *Messenger) cycleMailservers() {
   130  	m.logger.Info("Automatically switching mailserver")
   131  
   132  	if m.mailserverCycle.activeMailserver != nil {
   133  		m.disconnectActiveMailserver(graylistBackoff)
   134  	}
   135  
   136  	useMailserver, err := m.settings.CanUseMailservers()
   137  	if err != nil {
   138  		m.logger.Error("failed to get use mailservers", zap.Error(err))
   139  		return
   140  	}
   141  
   142  	if !useMailserver {
   143  		m.logger.Info("Skipping mailserver search due to useMailserver being false")
   144  		return
   145  	}
   146  
   147  	err = m.findNewMailserver()
   148  	if err != nil {
   149  		m.logger.Error("Error getting new mailserver", zap.Error(err))
   150  	}
   151  }
   152  
   153  func poolSize(fleetSize int) int {
   154  	return int(math.Ceil(float64(fleetSize) / 4))
   155  }
   156  
   157  func (m *Messenger) getFleet() (string, error) {
   158  	var fleet string
   159  	dbFleet, err := m.settings.GetFleet()
   160  	if err != nil {
   161  		return "", err
   162  	}
   163  	if dbFleet != "" {
   164  		fleet = dbFleet
   165  	} else if m.config.clusterConfig.Fleet != "" {
   166  		fleet = m.config.clusterConfig.Fleet
   167  	} else {
   168  		fleet = params.FleetStatusProd
   169  	}
   170  	return fleet, nil
   171  }
   172  
   173  func (m *Messenger) allMailservers() ([]mailservers.Mailserver, error) {
   174  	// Get configured fleet
   175  	fleet, err := m.getFleet()
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	// Get default mailservers for given fleet
   181  	allMailservers := mailservers.DefaultMailserversByFleet(fleet)
   182  
   183  	// Add custom configured mailservers
   184  	if m.mailserversDatabase != nil {
   185  		customMailservers, err := m.mailserversDatabase.Mailservers()
   186  		if err != nil {
   187  			return nil, err
   188  		}
   189  
   190  		for _, c := range customMailservers {
   191  			if c.Fleet == fleet {
   192  				allMailservers = append(allMailservers, c)
   193  			}
   194  		}
   195  	}
   196  
   197  	return allMailservers, nil
   198  }
   199  
   200  type SortedMailserver struct {
   201  	Mailserver      mailservers.Mailserver
   202  	RTT             time.Duration
   203  	CanConnectAfter time.Time
   204  }
   205  
   206  func (m *Messenger) getAvailableMailserversSortedByRTT(allMailservers []mailservers.Mailserver) []mailservers.Mailserver {
   207  	// TODO: this can be replaced by peer selector once code is moved to go-waku api
   208  	availableMailservers := make(map[string]time.Duration)
   209  	availableMailserversMutex := sync.Mutex{}
   210  	availableMailserversWg := sync.WaitGroup{}
   211  	for _, mailserver := range allMailservers {
   212  		availableMailserversWg.Add(1)
   213  		go func(mailserver mailservers.Mailserver) {
   214  			defer availableMailserversWg.Done()
   215  
   216  			peerID, err := mailserver.PeerID()
   217  			if err != nil {
   218  				return
   219  			}
   220  
   221  			ctx, cancel := context.WithTimeout(m.ctx, 4*time.Second)
   222  			defer cancel()
   223  
   224  			rtt, err := m.transport.PingPeer(ctx, peerID)
   225  			if err == nil { // pinging mailservers might fail, but we don't care
   226  				availableMailserversMutex.Lock()
   227  				availableMailservers[mailserver.ID] = rtt
   228  				availableMailserversMutex.Unlock()
   229  			}
   230  		}(mailserver)
   231  	}
   232  	availableMailserversWg.Wait()
   233  
   234  	if len(availableMailservers) == 0 {
   235  		m.logger.Warn("No mailservers available") // Do nothing...
   236  		return nil
   237  	}
   238  
   239  	mailserversByID := make(map[string]mailservers.Mailserver)
   240  	for idx := range allMailservers {
   241  		mailserversByID[allMailservers[idx].ID] = allMailservers[idx]
   242  	}
   243  	var sortedMailservers []SortedMailserver
   244  	for mailserverID, rtt := range availableMailservers {
   245  		ms := mailserversByID[mailserverID]
   246  		sortedMailserver := SortedMailserver{
   247  			Mailserver: ms,
   248  			RTT:        rtt,
   249  		}
   250  		m.mailPeersMutex.Lock()
   251  		pInfo, ok := m.mailserverCycle.peers[ms.ID]
   252  		m.mailPeersMutex.Unlock()
   253  		if ok {
   254  			if time.Now().Before(pInfo.canConnectAfter) {
   255  				continue // We can't connect to this node yet
   256  			}
   257  		}
   258  		sortedMailservers = append(sortedMailservers, sortedMailserver)
   259  	}
   260  	sort.Sort(byRTTMsAndCanConnectBefore(sortedMailservers))
   261  
   262  	result := make([]mailservers.Mailserver, len(sortedMailservers))
   263  	for i, s := range sortedMailservers {
   264  		result[i] = s.Mailserver
   265  	}
   266  
   267  	return result
   268  }
   269  
   270  func (m *Messenger) findNewMailserver() error {
   271  	// we have to override DNS manually because of https://github.com/status-im/status-mobile/issues/19581
   272  	if overrideDNS {
   273  		var dialer net.Dialer
   274  		net.DefaultResolver = &net.Resolver{
   275  			PreferGo: false,
   276  			Dial: func(context context.Context, _, _ string) (net.Conn, error) {
   277  				conn, err := dialer.DialContext(context, "udp", bootstrapDNS)
   278  				if err != nil {
   279  					return nil, err
   280  				}
   281  				return conn, nil
   282  			},
   283  		}
   284  	}
   285  
   286  	pinnedMailserver, err := m.getPinnedMailserver()
   287  	if err != nil {
   288  		m.logger.Error("Could not obtain the pinned mailserver", zap.Error(err))
   289  		return err
   290  	}
   291  	if pinnedMailserver != nil {
   292  		return m.connectToMailserver(*pinnedMailserver)
   293  	}
   294  
   295  	m.logger.Info("Finding a new mailserver...")
   296  
   297  	allMailservers := m.mailserverCycle.allMailservers
   298  
   299  	//	TODO: remove this check once sockets are stable on x86_64 emulators
   300  	if findNearestMailServer {
   301  		allMailservers = m.getAvailableMailserversSortedByRTT(allMailservers)
   302  	}
   303  
   304  	// Picks a random mailserver amongs the ones with the lowest latency
   305  	// The pool size is 1/4 of the mailservers were pinged successfully
   306  	pSize := poolSize(len(allMailservers) - 1)
   307  	if pSize <= 0 {
   308  		pSize = len(allMailservers)
   309  		if pSize <= 0 {
   310  			m.logger.Warn("No storenodes available") // Do nothing...
   311  			return nil
   312  		}
   313  	}
   314  
   315  	r, err := rand.Int(rand.Reader, big.NewInt(int64(pSize)))
   316  	if err != nil {
   317  		return err
   318  	}
   319  
   320  	ms := allMailservers[r.Int64()]
   321  	return m.connectToMailserver(ms)
   322  }
   323  
   324  func (m *Messenger) mailserverStatus(mailserverID string) connStatus {
   325  	m.mailPeersMutex.RLock()
   326  	defer m.mailPeersMutex.RUnlock()
   327  	peer, ok := m.mailserverCycle.peers[mailserverID]
   328  	if !ok {
   329  		return disconnected
   330  	}
   331  	return peer.status
   332  }
   333  
   334  func (m *Messenger) connectToMailserver(ms mailservers.Mailserver) error {
   335  
   336  	m.logger.Info("connecting to mailserver", zap.String("mailserverID", ms.ID))
   337  
   338  	m.mailserverCycle.activeMailserver = &ms
   339  	signal.SendMailserverChanged(m.mailserverCycle.activeMailserver)
   340  
   341  	mailserverStatus := m.mailserverStatus(ms.ID)
   342  	if mailserverStatus != connected {
   343  		m.mailPeersMutex.Lock()
   344  		m.mailserverCycle.peers[ms.ID] = peerStatus{
   345  			status:                connected,
   346  			lastConnectionAttempt: time.Now(),
   347  			canConnectAfter:       time.Now().Add(defaultBackoff),
   348  			mailserver:            ms,
   349  		}
   350  		m.mailPeersMutex.Unlock()
   351  
   352  		m.mailserverCycle.activeMailserver.FailedRequests = 0
   353  		peerID, err := m.mailserverCycle.activeMailserver.PeerID()
   354  		if err != nil {
   355  			m.logger.Error("could not decode the peer id of mailserver", zap.Error(err))
   356  			return err
   357  		}
   358  
   359  		m.logger.Info("mailserver available", zap.String("mailserverID", m.mailserverCycle.activeMailserver.ID))
   360  		m.mailserverCycle.availabilitySubscriptions.EmitMailserverAvailable()
   361  		signal.SendMailserverAvailable(m.mailserverCycle.activeMailserver)
   362  
   363  		m.transport.SetStorePeerID(peerID)
   364  
   365  		// Query mailserver
   366  		m.asyncRequestAllHistoricMessages()
   367  	}
   368  	return nil
   369  }
   370  
   371  // getActiveMailserver returns the active mailserver if a communityID is present then it'll return the mailserver
   372  // for that community if it has a mailserver setup otherwise it'll return the global mailserver
   373  func (m *Messenger) getActiveMailserver(communityID ...string) *mailservers.Mailserver {
   374  	if len(communityID) == 0 || communityID[0] == "" {
   375  		return m.mailserverCycle.activeMailserver
   376  	}
   377  	ms, err := m.communityStorenodes.GetStorenodeByCommunityID(communityID[0])
   378  	if err != nil {
   379  		if !errors.Is(err, storenodes.ErrNotFound) {
   380  			m.logger.Error("getting storenode for community, using global", zap.String("communityID", communityID[0]), zap.Error(err))
   381  		}
   382  		// if we don't find a specific mailserver for the community, we just use the regular mailserverCycle's one
   383  		return m.mailserverCycle.activeMailserver
   384  	}
   385  	return &ms
   386  }
   387  
   388  func (m *Messenger) getActiveMailserverID(communityID ...string) string {
   389  	ms := m.getActiveMailserver(communityID...)
   390  	if ms == nil {
   391  		return ""
   392  	}
   393  	return ms.ID
   394  }
   395  
   396  func (m *Messenger) isMailserverAvailable(mailserverID string) bool {
   397  	return m.mailserverStatus(mailserverID) == connected
   398  }
   399  
   400  func (m *Messenger) penalizeMailserver(id string) {
   401  	m.mailPeersMutex.Lock()
   402  	defer m.mailPeersMutex.Unlock()
   403  	pInfo, ok := m.mailserverCycle.peers[id]
   404  	if !ok {
   405  		pInfo.status = disconnected
   406  	}
   407  
   408  	pInfo.canConnectAfter = time.Now().Add(graylistBackoff)
   409  	m.mailserverCycle.peers[id] = pInfo
   410  }
   411  
   412  func (m *Messenger) asyncRequestAllHistoricMessages() {
   413  	if !m.config.codeControlFlags.AutoRequestHistoricMessages {
   414  		return
   415  	}
   416  
   417  	m.logger.Debug("asyncRequestAllHistoricMessages")
   418  
   419  	go func() {
   420  		_, err := m.RequestAllHistoricMessages(false, true)
   421  		if err != nil {
   422  			m.logger.Error("failed to request historic messages", zap.Error(err))
   423  		}
   424  	}()
   425  }
   426  
   427  func (m *Messenger) verifyStorenodeStatus() {
   428  	ticker := time.NewTicker(1 * time.Second)
   429  	defer ticker.Stop()
   430  
   431  	for {
   432  		select {
   433  		case <-ticker.C:
   434  			err := m.disconnectStorenodeIfRequired()
   435  			if err != nil {
   436  				m.logger.Error("failed to handle mailserver cycle event", zap.Error(err))
   437  				continue
   438  			}
   439  
   440  		case <-m.quit:
   441  			return
   442  		}
   443  	}
   444  }
   445  
   446  func (m *Messenger) getPinnedMailserver() (*mailservers.Mailserver, error) {
   447  	fleet, err := m.getFleet()
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  
   452  	pinnedMailservers, err := m.settings.GetPinnedMailservers()
   453  	if err != nil {
   454  		return nil, err
   455  	}
   456  
   457  	pinnedMailserver, ok := pinnedMailservers[fleet]
   458  	if !ok {
   459  		return nil, nil
   460  	}
   461  
   462  	fleetMailservers := mailservers.DefaultMailservers()
   463  
   464  	for _, c := range fleetMailservers {
   465  		if c.Fleet == fleet && c.ID == pinnedMailserver {
   466  			return &c, nil
   467  		}
   468  	}
   469  
   470  	if m.mailserversDatabase != nil {
   471  		customMailservers, err := m.mailserversDatabase.Mailservers()
   472  		if err != nil {
   473  			return nil, err
   474  		}
   475  
   476  		for _, c := range customMailservers {
   477  			if c.Fleet == fleet && c.ID == pinnedMailserver {
   478  				return &c, nil
   479  			}
   480  		}
   481  	}
   482  
   483  	return nil, nil
   484  }
   485  
   486  func (m *Messenger) disconnectStorenodeIfRequired() error {
   487  	m.logger.Debug("wakuV2 storenode status verification")
   488  
   489  	if m.mailserverCycle.activeMailserver == nil {
   490  		// No active storenode, find a new one
   491  		m.cycleMailservers()
   492  		return nil
   493  	}
   494  
   495  	// Check whether we want to disconnect the active storenode
   496  	if m.mailserverCycle.activeMailserver.FailedRequests >= mailserverMaxFailedRequests {
   497  		m.penalizeMailserver(m.mailserverCycle.activeMailserver.ID)
   498  		signal.SendMailserverNotWorking()
   499  		m.logger.Info("too many failed requests", zap.String("storenode", m.mailserverCycle.activeMailserver.ID))
   500  		m.mailserverCycle.activeMailserver.FailedRequests = 0
   501  		return m.connectToNewMailserverAndWait()
   502  	}
   503  
   504  	return nil
   505  }
   506  
   507  func (m *Messenger) waitForAvailableStoreNode(timeout time.Duration) bool {
   508  	// Add 1 second to timeout, because the mailserver cycle has 1 second ticker, which doesn't tick on start.
   509  	// This can be improved after merging https://github.com/status-im/status-go/pull/4380.
   510  	// NOTE: https://stackoverflow.com/questions/32705582/how-to-get-time-tick-to-tick-immediately
   511  	timeout += time.Second
   512  
   513  	finish := make(chan struct{})
   514  	cancel := make(chan struct{})
   515  
   516  	wg := sync.WaitGroup{}
   517  	wg.Add(1)
   518  
   519  	go func() {
   520  		defer func() {
   521  			wg.Done()
   522  		}()
   523  		for !m.isMailserverAvailable(m.getActiveMailserverID()) {
   524  			select {
   525  			case <-m.mailserverCycle.availabilitySubscriptions.Subscribe():
   526  			case <-cancel:
   527  				return
   528  			}
   529  		}
   530  	}()
   531  
   532  	go func() {
   533  		defer func() {
   534  			close(finish)
   535  		}()
   536  		wg.Wait()
   537  	}()
   538  
   539  	select {
   540  	case <-finish:
   541  	case <-time.After(timeout):
   542  		close(cancel)
   543  	case <-m.ctx.Done():
   544  		close(cancel)
   545  	}
   546  
   547  	return m.isMailserverAvailable(m.getActiveMailserverID())
   548  }