github.com/keltia/go-ipfs@v0.3.8-0.20150909044612-210793031c63/diagnostics/diag.go (about)

     1  // package diagnostics implements a network diagnostics service that
     2  // allows a request to traverse the network and gather information
     3  // on every node connected to it.
     4  package diagnostics
     5  
     6  import (
     7  	"crypto/rand"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"sync"
    12  	"time"
    13  
    14  	ggio "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/io"
    15  	proto "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/gogo/protobuf/proto"
    16  	ctxio "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-context/io"
    17  	context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
    18  	pb "github.com/ipfs/go-ipfs/diagnostics/pb"
    19  	host "github.com/ipfs/go-ipfs/p2p/host"
    20  	inet "github.com/ipfs/go-ipfs/p2p/net"
    21  	peer "github.com/ipfs/go-ipfs/p2p/peer"
    22  	protocol "github.com/ipfs/go-ipfs/p2p/protocol"
    23  	util "github.com/ipfs/go-ipfs/util"
    24  )
    25  
    26  var log = util.Logger("diagnostics")
    27  
    28  // ProtocolDiag is the diagnostics protocol.ID
    29  var ProtocolDiag protocol.ID = "/ipfs/diagnostics"
    30  
    31  var ErrAlreadyRunning = errors.New("diagnostic with that ID already running")
    32  
    33  const ResponseTimeout = time.Second * 10
    34  const HopTimeoutDecrement = time.Second * 2
    35  
    36  // Diagnostics is a net service that manages requesting and responding to diagnostic
    37  // requests
    38  type Diagnostics struct {
    39  	host host.Host
    40  	self peer.ID
    41  
    42  	diagLock sync.Mutex
    43  	diagMap  map[string]time.Time
    44  	birth    time.Time
    45  }
    46  
    47  // NewDiagnostics instantiates a new diagnostics service running on the given network
    48  func NewDiagnostics(self peer.ID, h host.Host) *Diagnostics {
    49  	d := &Diagnostics{
    50  		host:    h,
    51  		self:    self,
    52  		birth:   time.Now(),
    53  		diagMap: make(map[string]time.Time),
    54  	}
    55  
    56  	h.SetStreamHandler(ProtocolDiag, d.handleNewStream)
    57  	return d
    58  }
    59  
    60  type connDiagInfo struct {
    61  	Latency time.Duration
    62  	ID      string
    63  	Count   int
    64  }
    65  
    66  type DiagInfo struct {
    67  	// This nodes ID
    68  	ID string
    69  
    70  	// A list of peers this node currently has open connections to
    71  	Connections []connDiagInfo
    72  
    73  	// A list of keys provided by this node
    74  	//    (currently not filled)
    75  	Keys []string
    76  
    77  	// How long this node has been running for
    78  	// TODO rename Uptime
    79  	LifeSpan time.Duration
    80  
    81  	// Incoming Bandwidth Usage
    82  	BwIn uint64
    83  
    84  	// Outgoing Bandwidth Usage
    85  	BwOut uint64
    86  
    87  	// Information about the version of code this node is running
    88  	CodeVersion string
    89  }
    90  
    91  // Marshal to json
    92  func (di *DiagInfo) Marshal() []byte {
    93  	b, err := json.Marshal(di)
    94  	if err != nil {
    95  		panic(err)
    96  	}
    97  	//TODO: also consider compressing this. There will be a lot of these
    98  	return b
    99  }
   100  
   101  func (d *Diagnostics) getPeers() map[peer.ID]int {
   102  	counts := make(map[peer.ID]int)
   103  	for _, p := range d.host.Network().Peers() {
   104  		counts[p]++
   105  	}
   106  
   107  	return counts
   108  }
   109  
   110  func (d *Diagnostics) getDiagInfo() *DiagInfo {
   111  	di := new(DiagInfo)
   112  	di.CodeVersion = "github.com/ipfs/go-ipfs"
   113  	di.ID = d.self.Pretty()
   114  	di.LifeSpan = time.Since(d.birth)
   115  	di.Keys = nil // Currently no way to query datastore
   116  
   117  	// di.BwIn, di.BwOut = d.host.BandwidthTotals() //TODO fix this.
   118  
   119  	for p, n := range d.getPeers() {
   120  		d := connDiagInfo{
   121  			Latency: d.host.Peerstore().LatencyEWMA(p),
   122  			ID:      p.Pretty(),
   123  			Count:   n,
   124  		}
   125  		di.Connections = append(di.Connections, d)
   126  	}
   127  	return di
   128  }
   129  
   130  func newID() string {
   131  	id := make([]byte, 16)
   132  	rand.Read(id)
   133  	return string(id)
   134  }
   135  
   136  // GetDiagnostic runs a diagnostics request across the entire network
   137  func (d *Diagnostics) GetDiagnostic(ctx context.Context, timeout time.Duration) ([]*DiagInfo, error) {
   138  	log.Debug("Getting diagnostic.")
   139  	ctx, cancel := context.WithTimeout(ctx, timeout)
   140  	defer cancel()
   141  
   142  	diagID := newID()
   143  	d.diagLock.Lock()
   144  	d.diagMap[diagID] = time.Now()
   145  	d.diagLock.Unlock()
   146  
   147  	log.Debug("Begin Diagnostic")
   148  
   149  	peers := d.getPeers()
   150  	log.Debugf("Sending diagnostic request to %d peers.", len(peers))
   151  
   152  	pmes := newMessage(diagID)
   153  
   154  	pmes.SetTimeoutDuration(timeout - HopTimeoutDecrement) // decrease timeout per hop
   155  	dpeers, err := d.getDiagnosticFromPeers(ctx, d.getPeers(), pmes)
   156  	if err != nil {
   157  		return nil, fmt.Errorf("diagnostic from peers err: %s", err)
   158  	}
   159  
   160  	di := d.getDiagInfo()
   161  	out := []*DiagInfo{di}
   162  	for dpi := range dpeers {
   163  		out = append(out, dpi)
   164  	}
   165  	return out, nil
   166  }
   167  
   168  func decodeDiagJson(data []byte) (*DiagInfo, error) {
   169  	di := new(DiagInfo)
   170  	err := json.Unmarshal(data, di)
   171  	if err != nil {
   172  		return nil, err
   173  	}
   174  
   175  	return di, nil
   176  }
   177  
   178  func (d *Diagnostics) getDiagnosticFromPeers(ctx context.Context, peers map[peer.ID]int, pmes *pb.Message) (<-chan *DiagInfo, error) {
   179  	respdata := make(chan *DiagInfo)
   180  	wg := sync.WaitGroup{}
   181  	for p := range peers {
   182  		wg.Add(1)
   183  		log.Debugf("Sending diagnostic request to peer: %s", p)
   184  		go func(p peer.ID) {
   185  			defer wg.Done()
   186  			out, err := d.getDiagnosticFromPeer(ctx, p, pmes)
   187  			if err != nil {
   188  				log.Debugf("Error getting diagnostic from %s: %s", p, err)
   189  				return
   190  			}
   191  			for d := range out {
   192  				respdata <- d
   193  			}
   194  		}(p)
   195  	}
   196  
   197  	go func() {
   198  		wg.Wait()
   199  		close(respdata)
   200  	}()
   201  
   202  	return respdata, nil
   203  }
   204  
   205  func (d *Diagnostics) getDiagnosticFromPeer(ctx context.Context, p peer.ID, pmes *pb.Message) (<-chan *DiagInfo, error) {
   206  	s, err := d.host.NewStream(ProtocolDiag, p)
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	cr := ctxio.NewReader(ctx, s) // ok to use. we defer close stream in this func
   212  	cw := ctxio.NewWriter(ctx, s) // ok to use. we defer close stream in this func
   213  	r := ggio.NewDelimitedReader(cr, inet.MessageSizeMax)
   214  	w := ggio.NewDelimitedWriter(cw)
   215  
   216  	start := time.Now()
   217  
   218  	if err := w.WriteMsg(pmes); err != nil {
   219  		return nil, err
   220  	}
   221  
   222  	out := make(chan *DiagInfo)
   223  	go func() {
   224  
   225  		defer func() {
   226  			close(out)
   227  			s.Close()
   228  			rtt := time.Since(start)
   229  			log.Infof("diagnostic request took: %s", rtt.String())
   230  		}()
   231  
   232  		for {
   233  			rpmes := new(pb.Message)
   234  			if err := r.ReadMsg(rpmes); err != nil {
   235  				log.Debugf("Error reading diagnostic from stream: %s", err)
   236  				return
   237  			}
   238  			if rpmes == nil {
   239  				log.Debug("Got no response back from diag request.")
   240  				return
   241  			}
   242  
   243  			di, err := decodeDiagJson(rpmes.GetData())
   244  			if err != nil {
   245  				log.Debug(err)
   246  				return
   247  			}
   248  
   249  			select {
   250  			case out <- di:
   251  			case <-ctx.Done():
   252  				return
   253  			}
   254  		}
   255  
   256  	}()
   257  
   258  	return out, nil
   259  }
   260  
   261  func newMessage(diagID string) *pb.Message {
   262  	pmes := new(pb.Message)
   263  	pmes.DiagID = proto.String(diagID)
   264  	return pmes
   265  }
   266  
   267  func (d *Diagnostics) HandleMessage(ctx context.Context, s inet.Stream) error {
   268  
   269  	cr := ctxio.NewReader(ctx, s)
   270  	cw := ctxio.NewWriter(ctx, s)
   271  	r := ggio.NewDelimitedReader(cr, inet.MessageSizeMax) // maxsize
   272  	w := ggio.NewDelimitedWriter(cw)
   273  
   274  	// deserialize msg
   275  	pmes := new(pb.Message)
   276  	if err := r.ReadMsg(pmes); err != nil {
   277  		log.Debugf("Failed to decode protobuf message: %v", err)
   278  		return nil
   279  	}
   280  
   281  	// Print out diagnostic
   282  	log.Infof("[peer: %s] Got message from [%s]\n",
   283  		d.self.Pretty(), s.Conn().RemotePeer())
   284  
   285  	// Make sure we havent already handled this request to prevent loops
   286  	if err := d.startDiag(pmes.GetDiagID()); err != nil {
   287  		return nil
   288  	}
   289  
   290  	resp := newMessage(pmes.GetDiagID())
   291  	resp.Data = d.getDiagInfo().Marshal()
   292  	if err := w.WriteMsg(resp); err != nil {
   293  		log.Debugf("Failed to write protobuf message over stream: %s", err)
   294  		return err
   295  	}
   296  
   297  	timeout := pmes.GetTimeoutDuration()
   298  	if timeout < HopTimeoutDecrement {
   299  		return fmt.Errorf("timeout too short: %s", timeout)
   300  	}
   301  	ctx, cancel := context.WithTimeout(ctx, timeout)
   302  	defer cancel()
   303  	pmes.SetTimeoutDuration(timeout - HopTimeoutDecrement)
   304  
   305  	dpeers, err := d.getDiagnosticFromPeers(ctx, d.getPeers(), pmes)
   306  	if err != nil {
   307  		log.Debugf("diagnostic from peers err: %s", err)
   308  		return err
   309  	}
   310  	for b := range dpeers {
   311  		resp := newMessage(pmes.GetDiagID())
   312  		resp.Data = b.Marshal()
   313  		if err := w.WriteMsg(resp); err != nil {
   314  			log.Debugf("Failed to write protobuf message over stream: %s", err)
   315  			return err
   316  		}
   317  	}
   318  
   319  	return nil
   320  }
   321  
   322  func (d *Diagnostics) startDiag(id string) error {
   323  	d.diagLock.Lock()
   324  	_, found := d.diagMap[id]
   325  	if found {
   326  		d.diagLock.Unlock()
   327  		return ErrAlreadyRunning
   328  	}
   329  	d.diagMap[id] = time.Now()
   330  	d.diagLock.Unlock()
   331  	return nil
   332  }
   333  
   334  func (d *Diagnostics) handleNewStream(s inet.Stream) {
   335  	d.HandleMessage(context.Background(), s)
   336  	s.Close()
   337  }