github.com/core-coin/go-core/v2@v2.1.9/les/costtracker.go (about)

     1  // Copyright 2019 by the Authors
     2  // This file is part of the go-core library.
     3  //
     4  // The go-core library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-core library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-core library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package les
    18  
    19  import (
    20  	"encoding/binary"
    21  	"math"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/core-coin/go-core/v2/xcbdb"
    27  
    28  	"github.com/core-coin/go-core/v2/common/mclock"
    29  	"github.com/core-coin/go-core/v2/les/flowcontrol"
    30  	"github.com/core-coin/go-core/v2/log"
    31  	"github.com/core-coin/go-core/v2/metrics"
    32  	"github.com/core-coin/go-core/v2/xcb"
    33  )
    34  
    35  const makeCostStats = false // make request cost statistics during operation
    36  
    37  var (
    38  	// average request cost estimates based on serving time
    39  	reqAvgTimeCost = requestCostTable{
    40  		GetBlockHeadersMsg:     {150000, 30000},
    41  		GetBlockBodiesMsg:      {0, 700000},
    42  		GetReceiptsMsg:         {0, 1000000},
    43  		GetCodeMsg:             {0, 450000},
    44  		GetProofsV2Msg:         {0, 600000},
    45  		GetHelperTrieProofsMsg: {0, 1000000},
    46  		SendTxV2Msg:            {0, 450000},
    47  		GetTxStatusMsg:         {0, 250000},
    48  	}
    49  	// maximum incoming message size estimates
    50  	reqMaxInSize = requestCostTable{
    51  		GetBlockHeadersMsg:     {40, 0},
    52  		GetBlockBodiesMsg:      {0, 40},
    53  		GetReceiptsMsg:         {0, 40},
    54  		GetCodeMsg:             {0, 80},
    55  		GetProofsV2Msg:         {0, 80},
    56  		GetHelperTrieProofsMsg: {0, 20},
    57  		SendTxV2Msg:            {0, 16500},
    58  		GetTxStatusMsg:         {0, 50},
    59  	}
    60  	// maximum outgoing message size estimates
    61  	reqMaxOutSize = requestCostTable{
    62  		GetBlockHeadersMsg:     {0, 556},
    63  		GetBlockBodiesMsg:      {0, 100000},
    64  		GetReceiptsMsg:         {0, 200000},
    65  		GetCodeMsg:             {0, 50000},
    66  		GetProofsV2Msg:         {0, 4000},
    67  		GetHelperTrieProofsMsg: {0, 4000},
    68  		SendTxV2Msg:            {0, 100},
    69  		GetTxStatusMsg:         {0, 100},
    70  	}
    71  	// request amounts that have to fit into the minimum buffer size minBufferMultiplier times
    72  	minBufferReqAmount = map[uint64]uint64{
    73  		GetBlockHeadersMsg:     192,
    74  		GetBlockBodiesMsg:      1,
    75  		GetReceiptsMsg:         1,
    76  		GetCodeMsg:             1,
    77  		GetProofsV2Msg:         1,
    78  		GetHelperTrieProofsMsg: 16,
    79  		SendTxV2Msg:            8,
    80  		GetTxStatusMsg:         64,
    81  	}
    82  	minBufferMultiplier = 3
    83  )
    84  
    85  const (
    86  	maxCostFactor    = 2    // ratio of maximum and average cost estimates
    87  	bufLimitRatio    = 6000 // fixed bufLimit/MRR ratio
    88  	gfUsageThreshold = 0.5
    89  	gfUsageTC        = time.Second
    90  	gfRaiseTC        = time.Second * 200
    91  	gfDropTC         = time.Second * 50
    92  	gfDbKey          = "_globalCostFactorV6"
    93  )
    94  
    95  // costTracker is responsible for calculating costs and cost estimates on the
    96  // server side. It continuously updates the global cost factor which is defined
    97  // as the number of cost units per nanosecond of serving time in a single thread.
    98  // It is based on statistics collected during serving requests in high-load periods
    99  // and practically acts as a one-dimension request price scaling factor over the
   100  // pre-defined cost estimate table.
   101  //
   102  // The reason for dynamically maintaining the global factor on the server side is:
   103  // the estimated time cost of the request is fixed(hardcoded) but the configuration
   104  // of the machine running the server is really different. Therefore, the request serving
   105  // time in different machine will vary greatly. And also, the request serving time
   106  // in same machine may vary greatly with different request pressure.
   107  //
   108  // In order to more effectively limit resources, we apply the global factor to serving
   109  // time to make the result as close as possible to the estimated time cost no matter
   110  // the server is slow or fast. And also we scale the totalRecharge with global factor
   111  // so that fast server can serve more requests than estimation and slow server can
   112  // reduce request pressure.
   113  //
   114  // Instead of scaling the cost values, the real value of cost units is changed by
   115  // applying the factor to the serving times. This is more convenient because the
   116  // changes in the cost factor can be applied immediately without always notifying
   117  // the clients about the changed cost tables.
   118  type costTracker struct {
   119  	db     xcbdb.Database
   120  	stopCh chan chan struct{}
   121  
   122  	inSizeFactor  float64
   123  	outSizeFactor float64
   124  	factor        float64
   125  	utilTarget    float64
   126  	minBufLimit   uint64
   127  
   128  	gfLock          sync.RWMutex
   129  	reqInfoCh       chan reqInfo
   130  	totalRechargeCh chan uint64
   131  
   132  	stats map[uint64][]uint64 // Used for testing purpose.
   133  
   134  	// TestHooks
   135  	testing      bool            // Disable real cost evaluation for testing purpose.
   136  	testCostList RequestCostList // Customized cost table for testing purpose.
   137  }
   138  
   139  // newCostTracker creates a cost tracker and loads the cost factor statistics from the database.
   140  // It also returns the minimum capacity that can be assigned to any peer.
   141  func newCostTracker(db xcbdb.Database, config *xcb.Config) (*costTracker, uint64) {
   142  	utilTarget := float64(config.LightServ) * flowcontrol.FixedPointMultiplier / 100
   143  	ct := &costTracker{
   144  		db:         db,
   145  		stopCh:     make(chan chan struct{}),
   146  		reqInfoCh:  make(chan reqInfo, 100),
   147  		utilTarget: utilTarget,
   148  	}
   149  	if config.LightIngress > 0 {
   150  		ct.inSizeFactor = utilTarget / float64(config.LightIngress)
   151  	}
   152  	if config.LightEgress > 0 {
   153  		ct.outSizeFactor = utilTarget / float64(config.LightEgress)
   154  	}
   155  	if makeCostStats {
   156  		ct.stats = make(map[uint64][]uint64)
   157  		for code := range reqAvgTimeCost {
   158  			ct.stats[code] = make([]uint64, 10)
   159  		}
   160  	}
   161  	ct.gfLoop()
   162  	costList := ct.makeCostList(ct.globalFactor() * 1.25)
   163  	for _, c := range costList {
   164  		amount := minBufferReqAmount[c.MsgCode]
   165  		cost := c.BaseCost + amount*c.ReqCost
   166  		if cost > ct.minBufLimit {
   167  			ct.minBufLimit = cost
   168  		}
   169  	}
   170  	ct.minBufLimit *= uint64(minBufferMultiplier)
   171  	return ct, (ct.minBufLimit-1)/bufLimitRatio + 1
   172  }
   173  
   174  // stop stops the cost tracker and saves the cost factor statistics to the database
   175  func (ct *costTracker) stop() {
   176  	stopCh := make(chan struct{})
   177  	ct.stopCh <- stopCh
   178  	<-stopCh
   179  	if makeCostStats {
   180  		ct.printStats()
   181  	}
   182  }
   183  
   184  // makeCostList returns upper cost estimates based on the hardcoded cost estimate
   185  // tables and the optionally specified incoming/outgoing bandwidth limits
   186  func (ct *costTracker) makeCostList(globalFactor float64) RequestCostList {
   187  	maxCost := func(avgTimeCost, inSize, outSize uint64) uint64 {
   188  		cost := avgTimeCost * maxCostFactor
   189  		inSizeCost := uint64(float64(inSize) * ct.inSizeFactor * globalFactor)
   190  		if inSizeCost > cost {
   191  			cost = inSizeCost
   192  		}
   193  		outSizeCost := uint64(float64(outSize) * ct.outSizeFactor * globalFactor)
   194  		if outSizeCost > cost {
   195  			cost = outSizeCost
   196  		}
   197  		return cost
   198  	}
   199  	var list RequestCostList
   200  	for code, data := range reqAvgTimeCost {
   201  		baseCost := maxCost(data.baseCost, reqMaxInSize[code].baseCost, reqMaxOutSize[code].baseCost)
   202  		reqCost := maxCost(data.reqCost, reqMaxInSize[code].reqCost, reqMaxOutSize[code].reqCost)
   203  		if ct.minBufLimit != 0 {
   204  			// if minBufLimit is set then always enforce maximum request cost <= minBufLimit
   205  			maxCost := baseCost + reqCost*minBufferReqAmount[code]
   206  			if maxCost > ct.minBufLimit {
   207  				mul := 0.999 * float64(ct.minBufLimit) / float64(maxCost)
   208  				baseCost = uint64(float64(baseCost) * mul)
   209  				reqCost = uint64(float64(reqCost) * mul)
   210  			}
   211  		}
   212  
   213  		list = append(list, requestCostListItem{
   214  			MsgCode:  code,
   215  			BaseCost: baseCost,
   216  			ReqCost:  reqCost,
   217  		})
   218  	}
   219  	return list
   220  }
   221  
   222  // reqInfo contains the estimated time cost and the actual request serving time
   223  // which acts as a feed source to update factor maintained by costTracker.
   224  type reqInfo struct {
   225  	// avgTimeCost is the estimated time cost corresponding to maxCostTable.
   226  	avgTimeCost float64
   227  
   228  	// servingTime is the CPU time corresponding to the actual processing of
   229  	// the request.
   230  	servingTime float64
   231  
   232  	// msgCode indicates the type of request.
   233  	msgCode uint64
   234  }
   235  
   236  // gfLoop starts an event loop which updates the global cost factor which is
   237  // calculated as a weighted average of the average estimate / serving time ratio.
   238  // The applied weight equals the serving time if gfUsage is over a threshold,
   239  // zero otherwise. gfUsage is the recent average serving time per time unit in
   240  // an exponential moving window. This ensures that statistics are collected only
   241  // under high-load circumstances where the measured serving times are relevant.
   242  // The total recharge parameter of the flow control system which controls the
   243  // total allowed serving time per second but nominated in cost units, should
   244  // also be scaled with the cost factor and is also updated by this loop.
   245  func (ct *costTracker) gfLoop() {
   246  	var (
   247  		factor, totalRecharge        float64
   248  		gfLog, recentTime, recentAvg float64
   249  
   250  		lastUpdate, expUpdate = mclock.Now(), mclock.Now()
   251  	)
   252  
   253  	// Load historical cost factor statistics from the database.
   254  	data, _ := ct.db.Get([]byte(gfDbKey))
   255  	if len(data) == 8 {
   256  		gfLog = math.Float64frombits(binary.BigEndian.Uint64(data[:]))
   257  	}
   258  	ct.factor = math.Exp(gfLog)
   259  	factor, totalRecharge = ct.factor, ct.utilTarget*ct.factor
   260  
   261  	// In order to perform factor data statistics under the high request pressure,
   262  	// we only adjust factor when recent factor usage beyond the threshold.
   263  	threshold := gfUsageThreshold * float64(gfUsageTC) * ct.utilTarget / flowcontrol.FixedPointMultiplier
   264  
   265  	go func() {
   266  		saveCostFactor := func() {
   267  			var data [8]byte
   268  			binary.BigEndian.PutUint64(data[:], math.Float64bits(gfLog))
   269  			ct.db.Put([]byte(gfDbKey), data[:])
   270  			log.Debug("global cost factor saved", "value", factor)
   271  		}
   272  		saveTicker := time.NewTicker(time.Minute * 10)
   273  		defer saveTicker.Stop()
   274  
   275  		for {
   276  			select {
   277  			case r := <-ct.reqInfoCh:
   278  				relCost := int64(factor * r.servingTime * 100 / r.avgTimeCost) // Convert the value to a percentage form
   279  
   280  				// Record more metrics if we are debugging
   281  				if metrics.EnabledExpensive {
   282  					switch r.msgCode {
   283  					case GetBlockHeadersMsg:
   284  						relativeCostHeaderHistogram.Update(relCost)
   285  					case GetBlockBodiesMsg:
   286  						relativeCostBodyHistogram.Update(relCost)
   287  					case GetReceiptsMsg:
   288  						relativeCostReceiptHistogram.Update(relCost)
   289  					case GetCodeMsg:
   290  						relativeCostCodeHistogram.Update(relCost)
   291  					case GetProofsV2Msg:
   292  						relativeCostProofHistogram.Update(relCost)
   293  					case GetHelperTrieProofsMsg:
   294  						relativeCostHelperProofHistogram.Update(relCost)
   295  					case SendTxV2Msg:
   296  						relativeCostSendTxHistogram.Update(relCost)
   297  					case GetTxStatusMsg:
   298  						relativeCostTxStatusHistogram.Update(relCost)
   299  					}
   300  				}
   301  				// SendTxV2 and GetTxStatus requests are two special cases.
   302  				// All other requests will only put pressure on the database, and
   303  				// the corresponding delay is relatively stable. While these two
   304  				// requests involve txpool query, which is usually unstable.
   305  				//
   306  				// TODO(raisty) fixes this.
   307  				if r.msgCode == SendTxV2Msg || r.msgCode == GetTxStatusMsg {
   308  					continue
   309  				}
   310  				requestServedMeter.Mark(int64(r.servingTime))
   311  				requestServedTimer.Update(time.Duration(r.servingTime))
   312  				requestEstimatedMeter.Mark(int64(r.avgTimeCost / factor))
   313  				requestEstimatedTimer.Update(time.Duration(r.avgTimeCost / factor))
   314  				relativeCostHistogram.Update(relCost)
   315  
   316  				now := mclock.Now()
   317  				dt := float64(now - expUpdate)
   318  				expUpdate = now
   319  				exp := math.Exp(-dt / float64(gfUsageTC))
   320  
   321  				// calculate factor correction until now, based on previous values
   322  				var gfCorr float64
   323  				max := recentTime
   324  				if recentAvg > max {
   325  					max = recentAvg
   326  				}
   327  				// we apply continuous correction when MAX(recentTime, recentAvg) > threshold
   328  				if max > threshold {
   329  					// calculate correction time between last expUpdate and now
   330  					if max*exp >= threshold {
   331  						gfCorr = dt
   332  					} else {
   333  						gfCorr = math.Log(max/threshold) * float64(gfUsageTC)
   334  					}
   335  					// calculate log(factor) correction with the right direction and time constant
   336  					if recentTime > recentAvg {
   337  						// drop factor if actual serving times are larger than average estimates
   338  						gfCorr /= -float64(gfDropTC)
   339  					} else {
   340  						// raise factor if actual serving times are smaller than average estimates
   341  						gfCorr /= float64(gfRaiseTC)
   342  					}
   343  				}
   344  				// update recent cost values with current request
   345  				recentTime = recentTime*exp + r.servingTime
   346  				recentAvg = recentAvg*exp + r.avgTimeCost/factor
   347  
   348  				if gfCorr != 0 {
   349  					// Apply the correction to factor
   350  					gfLog += gfCorr
   351  					factor = math.Exp(gfLog)
   352  					// Notify outside modules the new factor and totalRecharge.
   353  					if time.Duration(now-lastUpdate) > time.Second {
   354  						totalRecharge, lastUpdate = ct.utilTarget*factor, now
   355  						ct.gfLock.Lock()
   356  						ct.factor = factor
   357  						ch := ct.totalRechargeCh
   358  						ct.gfLock.Unlock()
   359  						if ch != nil {
   360  							select {
   361  							case ct.totalRechargeCh <- uint64(totalRecharge):
   362  							default:
   363  							}
   364  						}
   365  						globalFactorGauge.Update(int64(1000 * factor))
   366  						log.Debug("global cost factor updated", "factor", factor)
   367  					}
   368  				}
   369  				recentServedGauge.Update(int64(recentTime))
   370  				recentEstimatedGauge.Update(int64(recentAvg))
   371  
   372  			case <-saveTicker.C:
   373  				saveCostFactor()
   374  
   375  			case stopCh := <-ct.stopCh:
   376  				saveCostFactor()
   377  				close(stopCh)
   378  				return
   379  			}
   380  		}
   381  	}()
   382  }
   383  
   384  // globalFactor returns the current value of the global cost factor
   385  func (ct *costTracker) globalFactor() float64 {
   386  	ct.gfLock.RLock()
   387  	defer ct.gfLock.RUnlock()
   388  
   389  	return ct.factor
   390  }
   391  
   392  // totalRecharge returns the current total recharge parameter which is used by
   393  // flowcontrol.ClientManager and is scaled by the global cost factor
   394  func (ct *costTracker) totalRecharge() uint64 {
   395  	ct.gfLock.RLock()
   396  	defer ct.gfLock.RUnlock()
   397  
   398  	return uint64(ct.factor * ct.utilTarget)
   399  }
   400  
   401  // subscribeTotalRecharge returns all future updates to the total recharge value
   402  // through a channel and also returns the current value
   403  func (ct *costTracker) subscribeTotalRecharge(ch chan uint64) uint64 {
   404  	ct.gfLock.Lock()
   405  	defer ct.gfLock.Unlock()
   406  
   407  	ct.totalRechargeCh = ch
   408  	return uint64(ct.factor * ct.utilTarget)
   409  }
   410  
   411  // updateStats updates the global cost factor and (if enabled) the real cost vs.
   412  // average estimate statistics
   413  func (ct *costTracker) updateStats(code, amount, servingTime, realCost uint64) {
   414  	avg := reqAvgTimeCost[code]
   415  	avgTimeCost := avg.baseCost + amount*avg.reqCost
   416  	select {
   417  	case ct.reqInfoCh <- reqInfo{float64(avgTimeCost), float64(servingTime), code}:
   418  	default:
   419  	}
   420  	if makeCostStats {
   421  		realCost <<= 4
   422  		l := 0
   423  		for l < 9 && realCost > avgTimeCost {
   424  			l++
   425  			realCost >>= 1
   426  		}
   427  		atomic.AddUint64(&ct.stats[code][l], 1)
   428  	}
   429  }
   430  
   431  // realCost calculates the final cost of a request based on actual serving time,
   432  // incoming and outgoing message size
   433  //
   434  // Note: message size is only taken into account if bandwidth limitation is applied
   435  // and the cost based on either message size is greater than the cost based on
   436  // serving time. A maximum of the three costs is applied instead of their sum
   437  // because the three limited resources (serving thread time and i/o bandwidth) can
   438  // also be maxed out simultaneously.
   439  func (ct *costTracker) realCost(servingTime uint64, inSize, outSize uint32) uint64 {
   440  	cost := float64(servingTime)
   441  	inSizeCost := float64(inSize) * ct.inSizeFactor
   442  	if inSizeCost > cost {
   443  		cost = inSizeCost
   444  	}
   445  	outSizeCost := float64(outSize) * ct.outSizeFactor
   446  	if outSizeCost > cost {
   447  		cost = outSizeCost
   448  	}
   449  	return uint64(cost * ct.globalFactor())
   450  }
   451  
   452  // printStats prints the distribution of real request cost relative to the average estimates
   453  func (ct *costTracker) printStats() {
   454  	if ct.stats == nil {
   455  		return
   456  	}
   457  	for code, arr := range ct.stats {
   458  		log.Info("Request cost statistics", "code", code, "1/16", arr[0], "1/8", arr[1], "1/4", arr[2], "1/2", arr[3], "1", arr[4], "2", arr[5], "4", arr[6], "8", arr[7], "16", arr[8], ">16", arr[9])
   459  	}
   460  }
   461  
   462  type (
   463  	// requestCostTable assigns a cost estimate function to each request type
   464  	// which is a linear function of the requested amount
   465  	// (cost = baseCost + reqCost * amount)
   466  	requestCostTable map[uint64]*requestCosts
   467  	requestCosts     struct {
   468  		baseCost, reqCost uint64
   469  	}
   470  
   471  	// RequestCostList is a list representation of request costs which is used for
   472  	// database storage and communication through the network
   473  	RequestCostList     []requestCostListItem
   474  	requestCostListItem struct {
   475  		MsgCode, BaseCost, ReqCost uint64
   476  	}
   477  )
   478  
   479  // getMaxCost calculates the estimated cost for a given request type and amount
   480  func (table requestCostTable) getMaxCost(code, amount uint64) uint64 {
   481  	costs := table[code]
   482  	return costs.baseCost + amount*costs.reqCost
   483  }
   484  
   485  // decode converts a cost list to a cost table
   486  func (list RequestCostList) decode(protocolLength uint64) requestCostTable {
   487  	table := make(requestCostTable)
   488  	for _, e := range list {
   489  		if e.MsgCode < protocolLength {
   490  			table[e.MsgCode] = &requestCosts{
   491  				baseCost: e.BaseCost,
   492  				reqCost:  e.ReqCost,
   493  			}
   494  		}
   495  	}
   496  	return table
   497  }
   498  
   499  // testCostList returns a dummy request cost list used by tests
   500  func testCostList(testCost uint64) RequestCostList {
   501  	cl := make(RequestCostList, len(reqAvgTimeCost))
   502  	var max uint64
   503  	for code := range reqAvgTimeCost {
   504  		if code > max {
   505  			max = code
   506  		}
   507  	}
   508  	i := 0
   509  	for code := uint64(0); code <= max; code++ {
   510  		if _, ok := reqAvgTimeCost[code]; ok {
   511  			cl[i].MsgCode = code
   512  			cl[i].BaseCost = testCost
   513  			cl[i].ReqCost = 0
   514  			i++
   515  		}
   516  	}
   517  	return cl
   518  }