github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/diagnostics/diag.go (about) 1 // package diagnostics implements a network diagnostics service that 2 // allows a request to traverse the network and gather information 3 // on every node connected to it. 4 package diagnostics 5 6 import ( 7 "crypto/rand" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "sync" 12 "time" 13 14 ggio "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/io" 15 proto "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/proto" 16 ctxio "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-context/io" 17 context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" 18 pb "github.com/ipfs/go-ipfs/diagnostics/pb" 19 host "github.com/ipfs/go-ipfs/p2p/host" 20 inet "github.com/ipfs/go-ipfs/p2p/net" 21 peer "github.com/ipfs/go-ipfs/p2p/peer" 22 protocol "github.com/ipfs/go-ipfs/p2p/protocol" 23 util "github.com/ipfs/go-ipfs/util" 24 ) 25 26 var log = util.Logger("diagnostics") 27 28 // ProtocolDiag is the diagnostics protocol.ID 29 var ProtocolDiag protocol.ID = "/ipfs/diagnostics" 30 31 var ErrAlreadyRunning = errors.New("diagnostic with that ID already running") 32 33 const ResponseTimeout = time.Second * 10 34 const HopTimeoutDecrement = time.Second * 2 35 36 // Diagnostics is a net service that manages requesting and responding to diagnostic 37 // requests 38 type Diagnostics struct { 39 host host.Host 40 self peer.ID 41 42 diagLock sync.Mutex 43 diagMap map[string]time.Time 44 birth time.Time 45 } 46 47 // NewDiagnostics instantiates a new diagnostics service running on the given network 48 func NewDiagnostics(self peer.ID, h host.Host) *Diagnostics { 49 d := &Diagnostics{ 50 host: h, 51 self: self, 52 birth: time.Now(), 53 diagMap: make(map[string]time.Time), 54 } 55 56 h.SetStreamHandler(ProtocolDiag, d.handleNewStream) 57 return d 58 } 59 60 type connDiagInfo struct { 61 Latency time.Duration 62 ID string 63 Count int 64 } 65 66 type DiagInfo struct { 67 // This nodes ID 68 ID string 69 70 // A list of peers this node currently has open connections to 71 Connections []connDiagInfo 72 73 // A list of keys provided by this node 74 // (currently not filled) 75 Keys []string 76 77 // How long this node has been running for 78 // TODO rename Uptime 79 LifeSpan time.Duration 80 81 // Incoming Bandwidth Usage 82 BwIn uint64 83 84 // Outgoing Bandwidth Usage 85 BwOut uint64 86 87 // Information about the version of code this node is running 88 CodeVersion string 89 } 90 91 // Marshal to json 92 func (di *DiagInfo) Marshal() []byte { 93 b, err := json.Marshal(di) 94 if err != nil { 95 panic(err) 96 } 97 //TODO: also consider compressing this. There will be a lot of these 98 return b 99 } 100 101 func (d *Diagnostics) getPeers() map[peer.ID]int { 102 counts := make(map[peer.ID]int) 103 for _, p := range d.host.Network().Peers() { 104 counts[p]++ 105 } 106 107 return counts 108 } 109 110 func (d *Diagnostics) getDiagInfo() *DiagInfo { 111 di := new(DiagInfo) 112 di.CodeVersion = "github.com/ipfs/go-ipfs" 113 di.ID = d.self.Pretty() 114 di.LifeSpan = time.Since(d.birth) 115 di.Keys = nil // Currently no way to query datastore 116 117 // di.BwIn, di.BwOut = d.host.BandwidthTotals() //TODO fix this. 118 119 for p, n := range d.getPeers() { 120 d := connDiagInfo{ 121 Latency: d.host.Peerstore().LatencyEWMA(p), 122 ID: p.Pretty(), 123 Count: n, 124 } 125 di.Connections = append(di.Connections, d) 126 } 127 return di 128 } 129 130 func newID() string { 131 id := make([]byte, 16) 132 rand.Read(id) 133 return string(id) 134 } 135 136 // GetDiagnostic runs a diagnostics request across the entire network 137 func (d *Diagnostics) GetDiagnostic(ctx context.Context, timeout time.Duration) ([]*DiagInfo, error) { 138 log.Debug("Getting diagnostic.") 139 ctx, cancel := context.WithTimeout(ctx, timeout) 140 defer cancel() 141 142 diagID := newID() 143 d.diagLock.Lock() 144 d.diagMap[diagID] = time.Now() 145 d.diagLock.Unlock() 146 147 log.Debug("Begin Diagnostic") 148 149 peers := d.getPeers() 150 log.Debugf("Sending diagnostic request to %d peers.", len(peers)) 151 152 pmes := newMessage(diagID) 153 154 pmes.SetTimeoutDuration(timeout - HopTimeoutDecrement) // decrease timeout per hop 155 dpeers, err := d.getDiagnosticFromPeers(ctx, d.getPeers(), pmes) 156 if err != nil { 157 return nil, fmt.Errorf("diagnostic from peers err: %s", err) 158 } 159 160 di := d.getDiagInfo() 161 out := []*DiagInfo{di} 162 for dpi := range dpeers { 163 out = append(out, dpi) 164 } 165 return out, nil 166 } 167 168 func decodeDiagJson(data []byte) (*DiagInfo, error) { 169 di := new(DiagInfo) 170 err := json.Unmarshal(data, di) 171 if err != nil { 172 return nil, err 173 } 174 175 return di, nil 176 } 177 178 func (d *Diagnostics) getDiagnosticFromPeers(ctx context.Context, peers map[peer.ID]int, pmes *pb.Message) (<-chan *DiagInfo, error) { 179 respdata := make(chan *DiagInfo) 180 wg := sync.WaitGroup{} 181 for p := range peers { 182 wg.Add(1) 183 log.Debugf("Sending diagnostic request to peer: %s", p) 184 go func(p peer.ID) { 185 defer wg.Done() 186 out, err := d.getDiagnosticFromPeer(ctx, p, pmes) 187 if err != nil { 188 log.Debugf("Error getting diagnostic from %s: %s", p, err) 189 return 190 } 191 for d := range out { 192 respdata <- d 193 } 194 }(p) 195 } 196 197 go func() { 198 wg.Wait() 199 close(respdata) 200 }() 201 202 return respdata, nil 203 } 204 205 func (d *Diagnostics) getDiagnosticFromPeer(ctx context.Context, p peer.ID, pmes *pb.Message) (<-chan *DiagInfo, error) { 206 s, err := d.host.NewStream(ProtocolDiag, p) 207 if err != nil { 208 return nil, err 209 } 210 211 cr := ctxio.NewReader(ctx, s) // ok to use. we defer close stream in this func 212 cw := ctxio.NewWriter(ctx, s) // ok to use. we defer close stream in this func 213 r := ggio.NewDelimitedReader(cr, inet.MessageSizeMax) 214 w := ggio.NewDelimitedWriter(cw) 215 216 start := time.Now() 217 218 if err := w.WriteMsg(pmes); err != nil { 219 return nil, err 220 } 221 222 out := make(chan *DiagInfo) 223 go func() { 224 225 defer func() { 226 close(out) 227 s.Close() 228 rtt := time.Since(start) 229 log.Infof("diagnostic request took: %s", rtt.String()) 230 }() 231 232 for { 233 rpmes := new(pb.Message) 234 if err := r.ReadMsg(rpmes); err != nil { 235 log.Debugf("Error reading diagnostic from stream: %s", err) 236 return 237 } 238 if rpmes == nil { 239 log.Debug("Got no response back from diag request.") 240 return 241 } 242 243 di, err := decodeDiagJson(rpmes.GetData()) 244 if err != nil { 245 log.Debug(err) 246 return 247 } 248 249 select { 250 case out <- di: 251 case <-ctx.Done(): 252 return 253 } 254 } 255 256 }() 257 258 return out, nil 259 } 260 261 func newMessage(diagID string) *pb.Message { 262 pmes := new(pb.Message) 263 pmes.DiagID = proto.String(diagID) 264 return pmes 265 } 266 267 func (d *Diagnostics) HandleMessage(ctx context.Context, s inet.Stream) error { 268 269 cr := ctxio.NewReader(ctx, s) 270 cw := ctxio.NewWriter(ctx, s) 271 r := ggio.NewDelimitedReader(cr, inet.MessageSizeMax) // maxsize 272 w := ggio.NewDelimitedWriter(cw) 273 274 // deserialize msg 275 pmes := new(pb.Message) 276 if err := r.ReadMsg(pmes); err != nil { 277 log.Debugf("Failed to decode protobuf message: %v", err) 278 return nil 279 } 280 281 // Print out diagnostic 282 log.Infof("[peer: %s] Got message from [%s]\n", 283 d.self.Pretty(), s.Conn().RemotePeer()) 284 285 // Make sure we havent already handled this request to prevent loops 286 if err := d.startDiag(pmes.GetDiagID()); err != nil { 287 return nil 288 } 289 290 resp := newMessage(pmes.GetDiagID()) 291 resp.Data = d.getDiagInfo().Marshal() 292 if err := w.WriteMsg(resp); err != nil { 293 log.Debugf("Failed to write protobuf message over stream: %s", err) 294 return err 295 } 296 297 timeout := pmes.GetTimeoutDuration() 298 if timeout < HopTimeoutDecrement { 299 return fmt.Errorf("timeout too short: %s", timeout) 300 } 301 ctx, cancel := context.WithTimeout(ctx, timeout) 302 defer cancel() 303 pmes.SetTimeoutDuration(timeout - HopTimeoutDecrement) 304 305 dpeers, err := d.getDiagnosticFromPeers(ctx, d.getPeers(), pmes) 306 if err != nil { 307 log.Debugf("diagnostic from peers err: %s", err) 308 return err 309 } 310 for b := range dpeers { 311 resp := newMessage(pmes.GetDiagID()) 312 resp.Data = b.Marshal() 313 if err := w.WriteMsg(resp); err != nil { 314 log.Debugf("Failed to write protobuf message over stream: %s", err) 315 return err 316 } 317 } 318 319 return nil 320 } 321 322 func (d *Diagnostics) startDiag(id string) error { 323 d.diagLock.Lock() 324 _, found := d.diagMap[id] 325 if found { 326 d.diagLock.Unlock() 327 return ErrAlreadyRunning 328 } 329 d.diagMap[id] = time.Now() 330 d.diagLock.Unlock() 331 return nil 332 } 333 334 func (d *Diagnostics) handleNewStream(s inet.Stream) { 335 d.HandleMessage(context.Background(), s) 336 s.Close() 337 }