github.com/status-im/status-go@v1.1.0/protocol/messenger_mailserver_cycle.go (about) 1 package protocol 2 3 import ( 4 "context" 5 "crypto/rand" 6 "math" 7 "math/big" 8 "net" 9 "runtime" 10 "sort" 11 "sync" 12 "time" 13 14 "github.com/pkg/errors" 15 "go.uber.org/zap" 16 17 "github.com/waku-org/go-waku/waku/v2/utils" 18 19 "github.com/status-im/status-go/params" 20 "github.com/status-im/status-go/protocol/storenodes" 21 "github.com/status-im/status-go/services/mailservers" 22 "github.com/status-im/status-go/signal" 23 ) 24 25 const defaultBackoff = 10 * time.Second 26 const graylistBackoff = 3 * time.Minute 27 const backoffByUserAction = 0 28 const isAndroidEmulator = runtime.GOOS == "android" && runtime.GOARCH == "amd64" 29 const findNearestMailServer = !isAndroidEmulator 30 const overrideDNS = runtime.GOOS == "android" || runtime.GOOS == "ios" 31 const bootstrapDNS = "8.8.8.8:53" 32 33 type byRTTMsAndCanConnectBefore []SortedMailserver 34 35 func (s byRTTMsAndCanConnectBefore) Len() int { 36 return len(s) 37 } 38 39 func (s byRTTMsAndCanConnectBefore) Swap(i, j int) { 40 s[i], s[j] = s[j], s[i] 41 } 42 43 func (s byRTTMsAndCanConnectBefore) Less(i, j int) bool { 44 // Slightly inaccurate as time sensitive sorting, but it does not matter so much 45 now := time.Now() 46 if s[i].CanConnectAfter.Before(now) && s[j].CanConnectAfter.Before(now) { 47 return s[i].RTT < s[j].RTT 48 } 49 return s[i].CanConnectAfter.Before(s[j].CanConnectAfter) 50 } 51 52 func (m *Messenger) StartMailserverCycle(mailservers []mailservers.Mailserver) error { 53 if m.transport.WakuVersion() != 2 { 54 m.logger.Warn("not starting mailserver cycle: requires wakuv2") 55 return nil 56 } 57 58 m.mailserverCycle.allMailservers = mailservers 59 60 if len(mailservers) == 0 { 61 m.logger.Warn("not starting mailserver cycle: empty mailservers list") 62 return nil 63 } 64 65 for _, storenode := range mailservers { 66 67 peerInfo, err := storenode.PeerInfo() 68 if err != nil { 69 return err 70 } 71 72 for _, addr := range utils.EncapsulatePeerID(peerInfo.ID, peerInfo.Addrs...) { 73 _, err := m.transport.AddStorePeer(addr) 74 if err != nil { 75 return err 76 } 77 } 78 } 79 go m.verifyStorenodeStatus() 80 81 m.logger.Debug("starting mailserver cycle", 82 zap.Uint("WakuVersion", m.transport.WakuVersion()), 83 zap.Any("mailservers", mailservers), 84 ) 85 86 return nil 87 } 88 89 func (m *Messenger) DisconnectActiveMailserver() { 90 m.mailserverCycle.Lock() 91 defer m.mailserverCycle.Unlock() 92 m.disconnectActiveMailserver(graylistBackoff) 93 } 94 95 func (m *Messenger) disconnectMailserver(backoffDuration time.Duration) error { 96 if m.mailserverCycle.activeMailserver == nil { 97 m.logger.Info("no active mailserver") 98 return nil 99 } 100 m.logger.Info("disconnecting active mailserver", zap.String("nodeID", m.mailserverCycle.activeMailserver.ID)) 101 m.mailPeersMutex.Lock() 102 pInfo, ok := m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID] 103 if ok { 104 pInfo.status = disconnected 105 106 pInfo.canConnectAfter = time.Now().Add(backoffDuration) 107 m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID] = pInfo 108 } else { 109 m.mailserverCycle.peers[m.mailserverCycle.activeMailserver.ID] = peerStatus{ 110 status: disconnected, 111 mailserver: *m.mailserverCycle.activeMailserver, 112 canConnectAfter: time.Now().Add(backoffDuration), 113 } 114 } 115 m.mailPeersMutex.Unlock() 116 117 m.mailserverCycle.activeMailserver = nil 118 return nil 119 } 120 121 func (m *Messenger) disconnectActiveMailserver(backoffDuration time.Duration) { 122 err := m.disconnectMailserver(backoffDuration) 123 if err != nil { 124 m.logger.Error("failed to disconnect mailserver", zap.Error(err)) 125 } 126 signal.SendMailserverChanged(nil) 127 } 128 129 func (m *Messenger) cycleMailservers() { 130 m.logger.Info("Automatically switching mailserver") 131 132 if m.mailserverCycle.activeMailserver != nil { 133 m.disconnectActiveMailserver(graylistBackoff) 134 } 135 136 useMailserver, err := m.settings.CanUseMailservers() 137 if err != nil { 138 m.logger.Error("failed to get use mailservers", zap.Error(err)) 139 return 140 } 141 142 if !useMailserver { 143 m.logger.Info("Skipping mailserver search due to useMailserver being false") 144 return 145 } 146 147 err = m.findNewMailserver() 148 if err != nil { 149 m.logger.Error("Error getting new mailserver", zap.Error(err)) 150 } 151 } 152 153 func poolSize(fleetSize int) int { 154 return int(math.Ceil(float64(fleetSize) / 4)) 155 } 156 157 func (m *Messenger) getFleet() (string, error) { 158 var fleet string 159 dbFleet, err := m.settings.GetFleet() 160 if err != nil { 161 return "", err 162 } 163 if dbFleet != "" { 164 fleet = dbFleet 165 } else if m.config.clusterConfig.Fleet != "" { 166 fleet = m.config.clusterConfig.Fleet 167 } else { 168 fleet = params.FleetStatusProd 169 } 170 return fleet, nil 171 } 172 173 func (m *Messenger) allMailservers() ([]mailservers.Mailserver, error) { 174 // Get configured fleet 175 fleet, err := m.getFleet() 176 if err != nil { 177 return nil, err 178 } 179 180 // Get default mailservers for given fleet 181 allMailservers := mailservers.DefaultMailserversByFleet(fleet) 182 183 // Add custom configured mailservers 184 if m.mailserversDatabase != nil { 185 customMailservers, err := m.mailserversDatabase.Mailservers() 186 if err != nil { 187 return nil, err 188 } 189 190 for _, c := range customMailservers { 191 if c.Fleet == fleet { 192 allMailservers = append(allMailservers, c) 193 } 194 } 195 } 196 197 return allMailservers, nil 198 } 199 200 type SortedMailserver struct { 201 Mailserver mailservers.Mailserver 202 RTT time.Duration 203 CanConnectAfter time.Time 204 } 205 206 func (m *Messenger) getAvailableMailserversSortedByRTT(allMailservers []mailservers.Mailserver) []mailservers.Mailserver { 207 // TODO: this can be replaced by peer selector once code is moved to go-waku api 208 availableMailservers := make(map[string]time.Duration) 209 availableMailserversMutex := sync.Mutex{} 210 availableMailserversWg := sync.WaitGroup{} 211 for _, mailserver := range allMailservers { 212 availableMailserversWg.Add(1) 213 go func(mailserver mailservers.Mailserver) { 214 defer availableMailserversWg.Done() 215 216 peerID, err := mailserver.PeerID() 217 if err != nil { 218 return 219 } 220 221 ctx, cancel := context.WithTimeout(m.ctx, 4*time.Second) 222 defer cancel() 223 224 rtt, err := m.transport.PingPeer(ctx, peerID) 225 if err == nil { // pinging mailservers might fail, but we don't care 226 availableMailserversMutex.Lock() 227 availableMailservers[mailserver.ID] = rtt 228 availableMailserversMutex.Unlock() 229 } 230 }(mailserver) 231 } 232 availableMailserversWg.Wait() 233 234 if len(availableMailservers) == 0 { 235 m.logger.Warn("No mailservers available") // Do nothing... 236 return nil 237 } 238 239 mailserversByID := make(map[string]mailservers.Mailserver) 240 for idx := range allMailservers { 241 mailserversByID[allMailservers[idx].ID] = allMailservers[idx] 242 } 243 var sortedMailservers []SortedMailserver 244 for mailserverID, rtt := range availableMailservers { 245 ms := mailserversByID[mailserverID] 246 sortedMailserver := SortedMailserver{ 247 Mailserver: ms, 248 RTT: rtt, 249 } 250 m.mailPeersMutex.Lock() 251 pInfo, ok := m.mailserverCycle.peers[ms.ID] 252 m.mailPeersMutex.Unlock() 253 if ok { 254 if time.Now().Before(pInfo.canConnectAfter) { 255 continue // We can't connect to this node yet 256 } 257 } 258 sortedMailservers = append(sortedMailservers, sortedMailserver) 259 } 260 sort.Sort(byRTTMsAndCanConnectBefore(sortedMailservers)) 261 262 result := make([]mailservers.Mailserver, len(sortedMailservers)) 263 for i, s := range sortedMailservers { 264 result[i] = s.Mailserver 265 } 266 267 return result 268 } 269 270 func (m *Messenger) findNewMailserver() error { 271 // we have to override DNS manually because of https://github.com/status-im/status-mobile/issues/19581 272 if overrideDNS { 273 var dialer net.Dialer 274 net.DefaultResolver = &net.Resolver{ 275 PreferGo: false, 276 Dial: func(context context.Context, _, _ string) (net.Conn, error) { 277 conn, err := dialer.DialContext(context, "udp", bootstrapDNS) 278 if err != nil { 279 return nil, err 280 } 281 return conn, nil 282 }, 283 } 284 } 285 286 pinnedMailserver, err := m.getPinnedMailserver() 287 if err != nil { 288 m.logger.Error("Could not obtain the pinned mailserver", zap.Error(err)) 289 return err 290 } 291 if pinnedMailserver != nil { 292 return m.connectToMailserver(*pinnedMailserver) 293 } 294 295 m.logger.Info("Finding a new mailserver...") 296 297 allMailservers := m.mailserverCycle.allMailservers 298 299 // TODO: remove this check once sockets are stable on x86_64 emulators 300 if findNearestMailServer { 301 allMailservers = m.getAvailableMailserversSortedByRTT(allMailservers) 302 } 303 304 // Picks a random mailserver amongs the ones with the lowest latency 305 // The pool size is 1/4 of the mailservers were pinged successfully 306 pSize := poolSize(len(allMailservers) - 1) 307 if pSize <= 0 { 308 pSize = len(allMailservers) 309 if pSize <= 0 { 310 m.logger.Warn("No storenodes available") // Do nothing... 311 return nil 312 } 313 } 314 315 r, err := rand.Int(rand.Reader, big.NewInt(int64(pSize))) 316 if err != nil { 317 return err 318 } 319 320 ms := allMailservers[r.Int64()] 321 return m.connectToMailserver(ms) 322 } 323 324 func (m *Messenger) mailserverStatus(mailserverID string) connStatus { 325 m.mailPeersMutex.RLock() 326 defer m.mailPeersMutex.RUnlock() 327 peer, ok := m.mailserverCycle.peers[mailserverID] 328 if !ok { 329 return disconnected 330 } 331 return peer.status 332 } 333 334 func (m *Messenger) connectToMailserver(ms mailservers.Mailserver) error { 335 336 m.logger.Info("connecting to mailserver", zap.String("mailserverID", ms.ID)) 337 338 m.mailserverCycle.activeMailserver = &ms 339 signal.SendMailserverChanged(m.mailserverCycle.activeMailserver) 340 341 mailserverStatus := m.mailserverStatus(ms.ID) 342 if mailserverStatus != connected { 343 m.mailPeersMutex.Lock() 344 m.mailserverCycle.peers[ms.ID] = peerStatus{ 345 status: connected, 346 lastConnectionAttempt: time.Now(), 347 canConnectAfter: time.Now().Add(defaultBackoff), 348 mailserver: ms, 349 } 350 m.mailPeersMutex.Unlock() 351 352 m.mailserverCycle.activeMailserver.FailedRequests = 0 353 peerID, err := m.mailserverCycle.activeMailserver.PeerID() 354 if err != nil { 355 m.logger.Error("could not decode the peer id of mailserver", zap.Error(err)) 356 return err 357 } 358 359 m.logger.Info("mailserver available", zap.String("mailserverID", m.mailserverCycle.activeMailserver.ID)) 360 m.mailserverCycle.availabilitySubscriptions.EmitMailserverAvailable() 361 signal.SendMailserverAvailable(m.mailserverCycle.activeMailserver) 362 363 m.transport.SetStorePeerID(peerID) 364 365 // Query mailserver 366 m.asyncRequestAllHistoricMessages() 367 } 368 return nil 369 } 370 371 // getActiveMailserver returns the active mailserver if a communityID is present then it'll return the mailserver 372 // for that community if it has a mailserver setup otherwise it'll return the global mailserver 373 func (m *Messenger) getActiveMailserver(communityID ...string) *mailservers.Mailserver { 374 if len(communityID) == 0 || communityID[0] == "" { 375 return m.mailserverCycle.activeMailserver 376 } 377 ms, err := m.communityStorenodes.GetStorenodeByCommunityID(communityID[0]) 378 if err != nil { 379 if !errors.Is(err, storenodes.ErrNotFound) { 380 m.logger.Error("getting storenode for community, using global", zap.String("communityID", communityID[0]), zap.Error(err)) 381 } 382 // if we don't find a specific mailserver for the community, we just use the regular mailserverCycle's one 383 return m.mailserverCycle.activeMailserver 384 } 385 return &ms 386 } 387 388 func (m *Messenger) getActiveMailserverID(communityID ...string) string { 389 ms := m.getActiveMailserver(communityID...) 390 if ms == nil { 391 return "" 392 } 393 return ms.ID 394 } 395 396 func (m *Messenger) isMailserverAvailable(mailserverID string) bool { 397 return m.mailserverStatus(mailserverID) == connected 398 } 399 400 func (m *Messenger) penalizeMailserver(id string) { 401 m.mailPeersMutex.Lock() 402 defer m.mailPeersMutex.Unlock() 403 pInfo, ok := m.mailserverCycle.peers[id] 404 if !ok { 405 pInfo.status = disconnected 406 } 407 408 pInfo.canConnectAfter = time.Now().Add(graylistBackoff) 409 m.mailserverCycle.peers[id] = pInfo 410 } 411 412 func (m *Messenger) asyncRequestAllHistoricMessages() { 413 if !m.config.codeControlFlags.AutoRequestHistoricMessages { 414 return 415 } 416 417 m.logger.Debug("asyncRequestAllHistoricMessages") 418 419 go func() { 420 _, err := m.RequestAllHistoricMessages(false, true) 421 if err != nil { 422 m.logger.Error("failed to request historic messages", zap.Error(err)) 423 } 424 }() 425 } 426 427 func (m *Messenger) verifyStorenodeStatus() { 428 ticker := time.NewTicker(1 * time.Second) 429 defer ticker.Stop() 430 431 for { 432 select { 433 case <-ticker.C: 434 err := m.disconnectStorenodeIfRequired() 435 if err != nil { 436 m.logger.Error("failed to handle mailserver cycle event", zap.Error(err)) 437 continue 438 } 439 440 case <-m.quit: 441 return 442 } 443 } 444 } 445 446 func (m *Messenger) getPinnedMailserver() (*mailservers.Mailserver, error) { 447 fleet, err := m.getFleet() 448 if err != nil { 449 return nil, err 450 } 451 452 pinnedMailservers, err := m.settings.GetPinnedMailservers() 453 if err != nil { 454 return nil, err 455 } 456 457 pinnedMailserver, ok := pinnedMailservers[fleet] 458 if !ok { 459 return nil, nil 460 } 461 462 fleetMailservers := mailservers.DefaultMailservers() 463 464 for _, c := range fleetMailservers { 465 if c.Fleet == fleet && c.ID == pinnedMailserver { 466 return &c, nil 467 } 468 } 469 470 if m.mailserversDatabase != nil { 471 customMailservers, err := m.mailserversDatabase.Mailservers() 472 if err != nil { 473 return nil, err 474 } 475 476 for _, c := range customMailservers { 477 if c.Fleet == fleet && c.ID == pinnedMailserver { 478 return &c, nil 479 } 480 } 481 } 482 483 return nil, nil 484 } 485 486 func (m *Messenger) disconnectStorenodeIfRequired() error { 487 m.logger.Debug("wakuV2 storenode status verification") 488 489 if m.mailserverCycle.activeMailserver == nil { 490 // No active storenode, find a new one 491 m.cycleMailservers() 492 return nil 493 } 494 495 // Check whether we want to disconnect the active storenode 496 if m.mailserverCycle.activeMailserver.FailedRequests >= mailserverMaxFailedRequests { 497 m.penalizeMailserver(m.mailserverCycle.activeMailserver.ID) 498 signal.SendMailserverNotWorking() 499 m.logger.Info("too many failed requests", zap.String("storenode", m.mailserverCycle.activeMailserver.ID)) 500 m.mailserverCycle.activeMailserver.FailedRequests = 0 501 return m.connectToNewMailserverAndWait() 502 } 503 504 return nil 505 } 506 507 func (m *Messenger) waitForAvailableStoreNode(timeout time.Duration) bool { 508 // Add 1 second to timeout, because the mailserver cycle has 1 second ticker, which doesn't tick on start. 509 // This can be improved after merging https://github.com/status-im/status-go/pull/4380. 510 // NOTE: https://stackoverflow.com/questions/32705582/how-to-get-time-tick-to-tick-immediately 511 timeout += time.Second 512 513 finish := make(chan struct{}) 514 cancel := make(chan struct{}) 515 516 wg := sync.WaitGroup{} 517 wg.Add(1) 518 519 go func() { 520 defer func() { 521 wg.Done() 522 }() 523 for !m.isMailserverAvailable(m.getActiveMailserverID()) { 524 select { 525 case <-m.mailserverCycle.availabilitySubscriptions.Subscribe(): 526 case <-cancel: 527 return 528 } 529 } 530 }() 531 532 go func() { 533 defer func() { 534 close(finish) 535 }() 536 wg.Wait() 537 }() 538 539 select { 540 case <-finish: 541 case <-time.After(timeout): 542 close(cancel) 543 case <-m.ctx.Done(): 544 close(cancel) 545 } 546 547 return m.isMailserverAvailable(m.getActiveMailserverID()) 548 }