github.com/shyftnetwork/go-empyrean@v1.8.3-0.20191127201940-fbfca9338f04/swarm/network/kademlia/kaddb.go (about)

     1  // Copyright 2016 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package kademlia
    18  
    19  import (
    20  	"encoding/json"
    21  	"fmt"
    22  	"io/ioutil"
    23  	"os"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/ShyftNetwork/go-empyrean/log"
    28  )
    29  
    30  type NodeData interface {
    31  	json.Marshaler
    32  	json.Unmarshaler
    33  }
    34  
    35  // allow inactive peers under
    36  type NodeRecord struct {
    37  	Addr  Address          // address of node
    38  	Url   string           // Url, used to connect to node
    39  	After time.Time        // next call after time
    40  	Seen  time.Time        // last connected at time
    41  	Meta  *json.RawMessage // arbitrary metadata saved for a peer
    42  
    43  	node Node
    44  }
    45  
    46  func (self *NodeRecord) setSeen() {
    47  	t := time.Now()
    48  	self.Seen = t
    49  	self.After = t
    50  }
    51  
    52  func (self *NodeRecord) String() string {
    53  	return fmt.Sprintf("<%v>", self.Addr)
    54  }
    55  
    56  // persisted node record database ()
    57  type KadDb struct {
    58  	Address              Address
    59  	Nodes                [][]*NodeRecord
    60  	index                map[Address]*NodeRecord
    61  	cursors              []int
    62  	lock                 sync.RWMutex
    63  	purgeInterval        time.Duration
    64  	initialRetryInterval time.Duration
    65  	connRetryExp         int
    66  }
    67  
    68  func newKadDb(addr Address, params *KadParams) *KadDb {
    69  	return &KadDb{
    70  		Address:              addr,
    71  		Nodes:                make([][]*NodeRecord, params.MaxProx+1), // overwritten by load
    72  		cursors:              make([]int, params.MaxProx+1),
    73  		index:                make(map[Address]*NodeRecord),
    74  		purgeInterval:        params.PurgeInterval,
    75  		initialRetryInterval: params.InitialRetryInterval,
    76  		connRetryExp:         params.ConnRetryExp,
    77  	}
    78  }
    79  
    80  func (self *KadDb) findOrCreate(index int, a Address, url string) *NodeRecord {
    81  	defer self.lock.Unlock()
    82  	self.lock.Lock()
    83  
    84  	record, found := self.index[a]
    85  	if !found {
    86  		record = &NodeRecord{
    87  			Addr: a,
    88  			Url:  url,
    89  		}
    90  		log.Info(fmt.Sprintf("add new record %v to kaddb", record))
    91  		// insert in kaddb
    92  		self.index[a] = record
    93  		self.Nodes[index] = append(self.Nodes[index], record)
    94  	} else {
    95  		log.Info(fmt.Sprintf("found record %v in kaddb", record))
    96  	}
    97  	// update last seen time
    98  	record.setSeen()
    99  	// update with url in case IP/port changes
   100  	record.Url = url
   101  	return record
   102  }
   103  
   104  // add adds node records to kaddb (persisted node record db)
   105  func (self *KadDb) add(nrs []*NodeRecord, proximityBin func(Address) int) {
   106  	defer self.lock.Unlock()
   107  	self.lock.Lock()
   108  	var n int
   109  	var nodes []*NodeRecord
   110  	for _, node := range nrs {
   111  		_, found := self.index[node.Addr]
   112  		if !found && node.Addr != self.Address {
   113  			node.setSeen()
   114  			self.index[node.Addr] = node
   115  			index := proximityBin(node.Addr)
   116  			dbcursor := self.cursors[index]
   117  			nodes = self.Nodes[index]
   118  			// this is inefficient for allocation, need to just append then shift
   119  			newnodes := make([]*NodeRecord, len(nodes)+1)
   120  			copy(newnodes[:], nodes[:dbcursor])
   121  			newnodes[dbcursor] = node
   122  			copy(newnodes[dbcursor+1:], nodes[dbcursor:])
   123  			log.Trace(fmt.Sprintf("new nodes: %v, nodes: %v", newnodes, nodes))
   124  			self.Nodes[index] = newnodes
   125  			n++
   126  		}
   127  	}
   128  	if n > 0 {
   129  		log.Debug(fmt.Sprintf("%d/%d node records (new/known)", n, len(nrs)))
   130  	}
   131  }
   132  
   133  /*
   134  next return one node record with the highest priority for desired
   135  connection.
   136  This is used to pick candidates for live nodes that are most wanted for
   137  a higly connected low centrality network structure for Swarm which best suits
   138  for a Kademlia-style routing.
   139  
   140  * Starting as naive node with empty db, this implements Kademlia bootstrapping
   141  * As a mature node, it fills short lines. All on demand.
   142  
   143  The candidate is chosen using the following strategy:
   144  We check for missing online nodes in the buckets for 1 upto Max BucketSize rounds.
   145  On each round we proceed from the low to high proximity order buckets.
   146  If the number of active nodes (=connected peers) is < rounds, then start looking
   147  for a known candidate. To determine if there is a candidate to recommend the
   148  kaddb node record database row corresponding to the bucket is checked.
   149  
   150  If the row cursor is on position i, the ith element in the row is chosen.
   151  If the record is scheduled not to be retried before NOW, the next element is taken.
   152  If the record is scheduled to be retried, it is set as checked, scheduled for
   153  checking and is returned. The time of the next check is in X (duration) such that
   154  X = ConnRetryExp * delta where delta is the time past since the last check and
   155  ConnRetryExp is constant obsoletion factor. (Note that when node records are added
   156  from peer messages, they are marked as checked and placed at the cursor, ie.
   157  given priority over older entries). Entries which were checked more than
   158  purgeInterval ago are deleted from the kaddb row. If no candidate is found after
   159  a full round of checking the next bucket up is considered. If no candidate is
   160  found when we reach the maximum-proximity bucket, the next round starts.
   161  
   162  node record a is more favoured to b a > b iff a is a passive node (record of
   163  offline past peer)
   164  |proxBin(a)| < |proxBin(b)|
   165  || (proxBin(a) < proxBin(b) && |proxBin(a)| == |proxBin(b)|)
   166  || (proxBin(a) == proxBin(b) && lastChecked(a) < lastChecked(b))
   167  
   168  
   169  The second argument returned names the first missing slot found
   170  */
   171  func (self *KadDb) findBest(maxBinSize int, binSize func(int) int) (node *NodeRecord, need bool, proxLimit int) {
   172  	// return nil, proxLimit indicates that all buckets are filled
   173  	defer self.lock.Unlock()
   174  	self.lock.Lock()
   175  
   176  	var interval time.Duration
   177  	var found bool
   178  	var purge []bool
   179  	var delta time.Duration
   180  	var cursor int
   181  	var count int
   182  	var after time.Time
   183  
   184  	// iterate over columns maximum bucketsize times
   185  	for rounds := 1; rounds <= maxBinSize; rounds++ {
   186  	ROUND:
   187  		// iterate over rows from PO 0 upto MaxProx
   188  		for po, dbrow := range self.Nodes {
   189  			// if row has rounds connected peers, then take the next
   190  			if binSize(po) >= rounds {
   191  				continue ROUND
   192  			}
   193  			if !need {
   194  				// set proxlimit to the PO where the first missing slot is found
   195  				proxLimit = po
   196  				need = true
   197  			}
   198  			purge = make([]bool, len(dbrow))
   199  
   200  			// there is a missing slot - finding a node to connect to
   201  			// select a node record from the relavant kaddb row (of identical prox order)
   202  		ROW:
   203  			for cursor = self.cursors[po]; !found && count < len(dbrow); cursor = (cursor + 1) % len(dbrow) {
   204  				count++
   205  				node = dbrow[cursor]
   206  
   207  				// skip already connected nodes
   208  				if node.node != nil {
   209  					log.Debug(fmt.Sprintf("kaddb record %v (PO%03d:%d/%d) already connected", node.Addr, po, cursor, len(dbrow)))
   210  					continue ROW
   211  				}
   212  
   213  				// if node is scheduled to connect
   214  				if node.After.After(time.Now()) {
   215  					log.Debug(fmt.Sprintf("kaddb record %v (PO%03d:%d) skipped. seen at %v (%v ago), scheduled at %v", node.Addr, po, cursor, node.Seen, delta, node.After))
   216  					continue ROW
   217  				}
   218  
   219  				delta = time.Since(node.Seen)
   220  				if delta < self.initialRetryInterval {
   221  					delta = self.initialRetryInterval
   222  				}
   223  				if delta > self.purgeInterval {
   224  					// remove node
   225  					purge[cursor] = true
   226  					log.Debug(fmt.Sprintf("kaddb record %v (PO%03d:%d) unreachable since %v. Removed", node.Addr, po, cursor, node.Seen))
   227  					continue ROW
   228  				}
   229  
   230  				log.Debug(fmt.Sprintf("kaddb record %v (PO%03d:%d) ready to be tried. seen at %v (%v ago), scheduled at %v", node.Addr, po, cursor, node.Seen, delta, node.After))
   231  
   232  				// scheduling next check
   233  				interval = delta * time.Duration(self.connRetryExp)
   234  				after = time.Now().Add(interval)
   235  
   236  				log.Debug(fmt.Sprintf("kaddb record %v (PO%03d:%d) selected as candidate connection %v. seen at %v (%v ago), selectable since %v, retry after %v (in %v)", node.Addr, po, cursor, rounds, node.Seen, delta, node.After, after, interval))
   237  				node.After = after
   238  				found = true
   239  			} // ROW
   240  			self.cursors[po] = cursor
   241  			self.delete(po, purge)
   242  			if found {
   243  				return node, need, proxLimit
   244  			}
   245  		} // ROUND
   246  	} // ROUNDS
   247  
   248  	return nil, need, proxLimit
   249  }
   250  
   251  // deletes the noderecords of a kaddb row corresponding to the indexes
   252  // caller must hold the dblock
   253  // the call is unsafe, no index checks
   254  func (self *KadDb) delete(row int, purge []bool) {
   255  	var nodes []*NodeRecord
   256  	dbrow := self.Nodes[row]
   257  	for i, del := range purge {
   258  		if i == self.cursors[row] {
   259  			//reset cursor
   260  			self.cursors[row] = len(nodes)
   261  		}
   262  		// delete the entry to be purged
   263  		if del {
   264  			delete(self.index, dbrow[i].Addr)
   265  			continue
   266  		}
   267  		// otherwise append to new list
   268  		nodes = append(nodes, dbrow[i])
   269  	}
   270  	self.Nodes[row] = nodes
   271  }
   272  
   273  // save persists kaddb on disk (written to file on path in json format.
   274  func (self *KadDb) save(path string, cb func(*NodeRecord, Node)) error {
   275  	defer self.lock.Unlock()
   276  	self.lock.Lock()
   277  
   278  	var n int
   279  
   280  	for _, b := range self.Nodes {
   281  		for _, node := range b {
   282  			n++
   283  			node.After = time.Now()
   284  			node.Seen = time.Now()
   285  			if cb != nil {
   286  				cb(node, node.node)
   287  			}
   288  		}
   289  	}
   290  
   291  	data, err := json.MarshalIndent(self, "", " ")
   292  	if err != nil {
   293  		return err
   294  	}
   295  	err = ioutil.WriteFile(path, data, os.ModePerm)
   296  	if err != nil {
   297  		log.Warn(fmt.Sprintf("unable to save kaddb with %v nodes to %v: %v", n, path, err))
   298  	} else {
   299  		log.Info(fmt.Sprintf("saved kaddb with %v nodes to %v", n, path))
   300  	}
   301  	return err
   302  }
   303  
   304  // Load(path) loads the node record database (kaddb) from file on path.
   305  func (self *KadDb) load(path string, cb func(*NodeRecord, Node) error) (err error) {
   306  	defer self.lock.Unlock()
   307  	self.lock.Lock()
   308  
   309  	var data []byte
   310  	data, err = ioutil.ReadFile(path)
   311  	if err != nil {
   312  		return
   313  	}
   314  
   315  	err = json.Unmarshal(data, self)
   316  	if err != nil {
   317  		return
   318  	}
   319  	var n int
   320  	var purge []bool
   321  	for po, b := range self.Nodes {
   322  		purge = make([]bool, len(b))
   323  	ROW:
   324  		for i, node := range b {
   325  			if cb != nil {
   326  				err = cb(node, node.node)
   327  				if err != nil {
   328  					purge[i] = true
   329  					continue ROW
   330  				}
   331  			}
   332  			n++
   333  			if node.After.IsZero() {
   334  				node.After = time.Now()
   335  			}
   336  			self.index[node.Addr] = node
   337  		}
   338  		self.delete(po, purge)
   339  	}
   340  	log.Info(fmt.Sprintf("loaded kaddb with %v nodes from %v", n, path))
   341  
   342  	return
   343  }
   344  
   345  // accessor for KAD offline db count
   346  func (self *KadDb) count() int {
   347  	defer self.lock.Unlock()
   348  	self.lock.Lock()
   349  	return len(self.index)
   350  }