github.com/jiasir/deis@v1.12.2/logger/drain/simple/drain.go (about) 1 package simple 2 3 import ( 4 "fmt" 5 "log" 6 "net" 7 "net/url" 8 "sync" 9 "time" 10 ) 11 12 // For efficiency, we reuse connections for a while (instead of dialing every time). However, 13 // there are two compelling reasons to redial periodically: 14 // 15 // 1. We don't want DNS changes on the remote end of the drain to go unnoticed for too long. 16 // 17 // 2. If the drain is using TCP, the underlying TCP stack can potentially take a very long time 18 // waiting for acks and retrying send for packets that haven't been acked. This creates a 19 // large window where packets can be spewed into the ether (without any warning) before the 20 // problem is detected. By redialing periodically, we create the opportunity for a failed TCP 21 // handshake-- which tells us sooner that something is wrong. 22 // 23 // For efficiency we want the refresh interval to be high. For resiliency, we want it to be low. 24 // One minute has been arbitrarily selected as a sensible balance of these two concerns. 25 const connRefreshInterval = 1 * time.Minute 26 27 // This determines how many failed dial attempts are required before the drain is muted. 28 const maxFailedConns = 5 29 30 // This determines how much time we're willing to spend dialing. 31 const dialTimeout = 10 * time.Second 32 33 // This is how long the drain is muted for after repeated connection failures. 34 const mutePeriod = 5 * time.Minute 35 36 type logDrain struct { 37 proto string 38 uri string 39 conn net.Conn 40 muted bool 41 mutex sync.Mutex 42 } 43 44 // NewDrain returns a pointer to a new instance of a drain.LogDrain 45 func NewDrain(drainURL string) (*logDrain, error) { 46 u, err := url.Parse(drainURL) 47 if err != nil { 48 return nil, err 49 } 50 var proto string 51 if u.Scheme == "udp" || u.Scheme == "syslog" { 52 proto = "udp" 53 } else if u.Scheme == "tcp" { 54 proto = "tcp" 55 } else { 56 return nil, fmt.Errorf("Invalid drain url scheme: %s", u.Scheme) 57 } 58 return &logDrain{proto: proto, uri: u.Host + u.Path}, nil 59 } 60 61 // Send forwards the provided log message to an external destination 62 func (d *logDrain) Send(message string) error { 63 if d.muted { 64 return nil 65 } 66 d.mutex.Lock() 67 defer d.mutex.Unlock() 68 conn, err := d.getConnection(false) 69 if err != nil { 70 return err 71 } 72 _, err = fmt.Fprintln(conn, message) 73 if err != nil { 74 // Try again with a new connection in case the issue was a broken pipe 75 conn, err = d.getConnection(true) 76 if err != nil { 77 return err 78 } 79 _, err = fmt.Fprintln(conn, message) 80 if err != nil { 81 return err 82 } 83 } 84 return nil 85 } 86 87 // getConnection returns a usable connection, often without needing to redial, but still 88 // redialing when advised. 89 func (d *logDrain) getConnection(forceNew bool) (net.Conn, error) { 90 // If we have a connection, it's not old, and we're not focing a new one... 91 if d.conn != nil && !forceNew { 92 // then return the existing connection 93 return d.conn, nil 94 } 95 // If ANY of those conditions weren't met, it's time for a new connection. 96 // If we have an existing one, close it and nil it out, too for good measure. 97 if d.conn != nil { 98 if err := d.conn.Close(); err != nil { 99 log.Println("drain: Error closing connection. Drain may be leaking connections.", err) 100 } 101 d.conn = nil 102 } 103 // Try a few times... 104 var err error 105 for attempt := 1; attempt <= maxFailedConns; attempt++ { 106 d.conn, err = net.DialTimeout(d.proto, d.uri, dialTimeout) 107 if err == nil { 108 // We got our connection... 109 // Make it good for only so long. See comment above on connRefreshInterval. 110 err = d.conn.SetWriteDeadline(time.Now().Add(connRefreshInterval)) 111 if err != nil { 112 return nil, err 113 } 114 // Break out of the loop and return 115 return d.conn, nil 116 } 117 } 118 // Multiple attempts to dial have failed. Whatever the problem is, we shouldn't expect that 119 // it will resolve itself quickly. 120 log.Printf("drain: Experienced %d consecutive failed connection attempts; muting drain for %s.", maxFailedConns, mutePeriod) 121 // Immediately "mute" the drain. This will prevent us from wasting resources repeatedly dialing 122 // and failing while the message queue gets backed up. This will give the network a break and 123 // allow us to empty the queue. 124 d.muted = true 125 // Unmute the drain when the mute interval has elapsed 126 go func() { 127 time.Sleep(mutePeriod) 128 d.muted = false 129 }() 130 // Return the error from the last failed connection attempt 131 return nil, err 132 }