github.com/core-coin/go-core/v2@v2.1.9/les/costtracker.go (about) 1 // Copyright 2019 by the Authors 2 // This file is part of the go-core library. 3 // 4 // The go-core library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-core library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-core library. If not, see <http://www.gnu.org/licenses/>. 16 17 package les 18 19 import ( 20 "encoding/binary" 21 "math" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/core-coin/go-core/v2/xcbdb" 27 28 "github.com/core-coin/go-core/v2/common/mclock" 29 "github.com/core-coin/go-core/v2/les/flowcontrol" 30 "github.com/core-coin/go-core/v2/log" 31 "github.com/core-coin/go-core/v2/metrics" 32 "github.com/core-coin/go-core/v2/xcb" 33 ) 34 35 const makeCostStats = false // make request cost statistics during operation 36 37 var ( 38 // average request cost estimates based on serving time 39 reqAvgTimeCost = requestCostTable{ 40 GetBlockHeadersMsg: {150000, 30000}, 41 GetBlockBodiesMsg: {0, 700000}, 42 GetReceiptsMsg: {0, 1000000}, 43 GetCodeMsg: {0, 450000}, 44 GetProofsV2Msg: {0, 600000}, 45 GetHelperTrieProofsMsg: {0, 1000000}, 46 SendTxV2Msg: {0, 450000}, 47 GetTxStatusMsg: {0, 250000}, 48 } 49 // maximum incoming message size estimates 50 reqMaxInSize = requestCostTable{ 51 GetBlockHeadersMsg: {40, 0}, 52 GetBlockBodiesMsg: {0, 40}, 53 GetReceiptsMsg: {0, 40}, 54 GetCodeMsg: {0, 80}, 55 GetProofsV2Msg: {0, 80}, 56 GetHelperTrieProofsMsg: {0, 20}, 57 SendTxV2Msg: {0, 16500}, 58 GetTxStatusMsg: {0, 50}, 59 } 60 // maximum outgoing message size estimates 61 reqMaxOutSize = requestCostTable{ 62 GetBlockHeadersMsg: {0, 556}, 63 GetBlockBodiesMsg: {0, 100000}, 64 GetReceiptsMsg: {0, 200000}, 65 GetCodeMsg: {0, 50000}, 66 GetProofsV2Msg: {0, 4000}, 67 GetHelperTrieProofsMsg: {0, 4000}, 68 SendTxV2Msg: {0, 100}, 69 GetTxStatusMsg: {0, 100}, 70 } 71 // request amounts that have to fit into the minimum buffer size minBufferMultiplier times 72 minBufferReqAmount = map[uint64]uint64{ 73 GetBlockHeadersMsg: 192, 74 GetBlockBodiesMsg: 1, 75 GetReceiptsMsg: 1, 76 GetCodeMsg: 1, 77 GetProofsV2Msg: 1, 78 GetHelperTrieProofsMsg: 16, 79 SendTxV2Msg: 8, 80 GetTxStatusMsg: 64, 81 } 82 minBufferMultiplier = 3 83 ) 84 85 const ( 86 maxCostFactor = 2 // ratio of maximum and average cost estimates 87 bufLimitRatio = 6000 // fixed bufLimit/MRR ratio 88 gfUsageThreshold = 0.5 89 gfUsageTC = time.Second 90 gfRaiseTC = time.Second * 200 91 gfDropTC = time.Second * 50 92 gfDbKey = "_globalCostFactorV6" 93 ) 94 95 // costTracker is responsible for calculating costs and cost estimates on the 96 // server side. It continuously updates the global cost factor which is defined 97 // as the number of cost units per nanosecond of serving time in a single thread. 98 // It is based on statistics collected during serving requests in high-load periods 99 // and practically acts as a one-dimension request price scaling factor over the 100 // pre-defined cost estimate table. 101 // 102 // The reason for dynamically maintaining the global factor on the server side is: 103 // the estimated time cost of the request is fixed(hardcoded) but the configuration 104 // of the machine running the server is really different. Therefore, the request serving 105 // time in different machine will vary greatly. And also, the request serving time 106 // in same machine may vary greatly with different request pressure. 107 // 108 // In order to more effectively limit resources, we apply the global factor to serving 109 // time to make the result as close as possible to the estimated time cost no matter 110 // the server is slow or fast. And also we scale the totalRecharge with global factor 111 // so that fast server can serve more requests than estimation and slow server can 112 // reduce request pressure. 113 // 114 // Instead of scaling the cost values, the real value of cost units is changed by 115 // applying the factor to the serving times. This is more convenient because the 116 // changes in the cost factor can be applied immediately without always notifying 117 // the clients about the changed cost tables. 118 type costTracker struct { 119 db xcbdb.Database 120 stopCh chan chan struct{} 121 122 inSizeFactor float64 123 outSizeFactor float64 124 factor float64 125 utilTarget float64 126 minBufLimit uint64 127 128 gfLock sync.RWMutex 129 reqInfoCh chan reqInfo 130 totalRechargeCh chan uint64 131 132 stats map[uint64][]uint64 // Used for testing purpose. 133 134 // TestHooks 135 testing bool // Disable real cost evaluation for testing purpose. 136 testCostList RequestCostList // Customized cost table for testing purpose. 137 } 138 139 // newCostTracker creates a cost tracker and loads the cost factor statistics from the database. 140 // It also returns the minimum capacity that can be assigned to any peer. 141 func newCostTracker(db xcbdb.Database, config *xcb.Config) (*costTracker, uint64) { 142 utilTarget := float64(config.LightServ) * flowcontrol.FixedPointMultiplier / 100 143 ct := &costTracker{ 144 db: db, 145 stopCh: make(chan chan struct{}), 146 reqInfoCh: make(chan reqInfo, 100), 147 utilTarget: utilTarget, 148 } 149 if config.LightIngress > 0 { 150 ct.inSizeFactor = utilTarget / float64(config.LightIngress) 151 } 152 if config.LightEgress > 0 { 153 ct.outSizeFactor = utilTarget / float64(config.LightEgress) 154 } 155 if makeCostStats { 156 ct.stats = make(map[uint64][]uint64) 157 for code := range reqAvgTimeCost { 158 ct.stats[code] = make([]uint64, 10) 159 } 160 } 161 ct.gfLoop() 162 costList := ct.makeCostList(ct.globalFactor() * 1.25) 163 for _, c := range costList { 164 amount := minBufferReqAmount[c.MsgCode] 165 cost := c.BaseCost + amount*c.ReqCost 166 if cost > ct.minBufLimit { 167 ct.minBufLimit = cost 168 } 169 } 170 ct.minBufLimit *= uint64(minBufferMultiplier) 171 return ct, (ct.minBufLimit-1)/bufLimitRatio + 1 172 } 173 174 // stop stops the cost tracker and saves the cost factor statistics to the database 175 func (ct *costTracker) stop() { 176 stopCh := make(chan struct{}) 177 ct.stopCh <- stopCh 178 <-stopCh 179 if makeCostStats { 180 ct.printStats() 181 } 182 } 183 184 // makeCostList returns upper cost estimates based on the hardcoded cost estimate 185 // tables and the optionally specified incoming/outgoing bandwidth limits 186 func (ct *costTracker) makeCostList(globalFactor float64) RequestCostList { 187 maxCost := func(avgTimeCost, inSize, outSize uint64) uint64 { 188 cost := avgTimeCost * maxCostFactor 189 inSizeCost := uint64(float64(inSize) * ct.inSizeFactor * globalFactor) 190 if inSizeCost > cost { 191 cost = inSizeCost 192 } 193 outSizeCost := uint64(float64(outSize) * ct.outSizeFactor * globalFactor) 194 if outSizeCost > cost { 195 cost = outSizeCost 196 } 197 return cost 198 } 199 var list RequestCostList 200 for code, data := range reqAvgTimeCost { 201 baseCost := maxCost(data.baseCost, reqMaxInSize[code].baseCost, reqMaxOutSize[code].baseCost) 202 reqCost := maxCost(data.reqCost, reqMaxInSize[code].reqCost, reqMaxOutSize[code].reqCost) 203 if ct.minBufLimit != 0 { 204 // if minBufLimit is set then always enforce maximum request cost <= minBufLimit 205 maxCost := baseCost + reqCost*minBufferReqAmount[code] 206 if maxCost > ct.minBufLimit { 207 mul := 0.999 * float64(ct.minBufLimit) / float64(maxCost) 208 baseCost = uint64(float64(baseCost) * mul) 209 reqCost = uint64(float64(reqCost) * mul) 210 } 211 } 212 213 list = append(list, requestCostListItem{ 214 MsgCode: code, 215 BaseCost: baseCost, 216 ReqCost: reqCost, 217 }) 218 } 219 return list 220 } 221 222 // reqInfo contains the estimated time cost and the actual request serving time 223 // which acts as a feed source to update factor maintained by costTracker. 224 type reqInfo struct { 225 // avgTimeCost is the estimated time cost corresponding to maxCostTable. 226 avgTimeCost float64 227 228 // servingTime is the CPU time corresponding to the actual processing of 229 // the request. 230 servingTime float64 231 232 // msgCode indicates the type of request. 233 msgCode uint64 234 } 235 236 // gfLoop starts an event loop which updates the global cost factor which is 237 // calculated as a weighted average of the average estimate / serving time ratio. 238 // The applied weight equals the serving time if gfUsage is over a threshold, 239 // zero otherwise. gfUsage is the recent average serving time per time unit in 240 // an exponential moving window. This ensures that statistics are collected only 241 // under high-load circumstances where the measured serving times are relevant. 242 // The total recharge parameter of the flow control system which controls the 243 // total allowed serving time per second but nominated in cost units, should 244 // also be scaled with the cost factor and is also updated by this loop. 245 func (ct *costTracker) gfLoop() { 246 var ( 247 factor, totalRecharge float64 248 gfLog, recentTime, recentAvg float64 249 250 lastUpdate, expUpdate = mclock.Now(), mclock.Now() 251 ) 252 253 // Load historical cost factor statistics from the database. 254 data, _ := ct.db.Get([]byte(gfDbKey)) 255 if len(data) == 8 { 256 gfLog = math.Float64frombits(binary.BigEndian.Uint64(data[:])) 257 } 258 ct.factor = math.Exp(gfLog) 259 factor, totalRecharge = ct.factor, ct.utilTarget*ct.factor 260 261 // In order to perform factor data statistics under the high request pressure, 262 // we only adjust factor when recent factor usage beyond the threshold. 263 threshold := gfUsageThreshold * float64(gfUsageTC) * ct.utilTarget / flowcontrol.FixedPointMultiplier 264 265 go func() { 266 saveCostFactor := func() { 267 var data [8]byte 268 binary.BigEndian.PutUint64(data[:], math.Float64bits(gfLog)) 269 ct.db.Put([]byte(gfDbKey), data[:]) 270 log.Debug("global cost factor saved", "value", factor) 271 } 272 saveTicker := time.NewTicker(time.Minute * 10) 273 defer saveTicker.Stop() 274 275 for { 276 select { 277 case r := <-ct.reqInfoCh: 278 relCost := int64(factor * r.servingTime * 100 / r.avgTimeCost) // Convert the value to a percentage form 279 280 // Record more metrics if we are debugging 281 if metrics.EnabledExpensive { 282 switch r.msgCode { 283 case GetBlockHeadersMsg: 284 relativeCostHeaderHistogram.Update(relCost) 285 case GetBlockBodiesMsg: 286 relativeCostBodyHistogram.Update(relCost) 287 case GetReceiptsMsg: 288 relativeCostReceiptHistogram.Update(relCost) 289 case GetCodeMsg: 290 relativeCostCodeHistogram.Update(relCost) 291 case GetProofsV2Msg: 292 relativeCostProofHistogram.Update(relCost) 293 case GetHelperTrieProofsMsg: 294 relativeCostHelperProofHistogram.Update(relCost) 295 case SendTxV2Msg: 296 relativeCostSendTxHistogram.Update(relCost) 297 case GetTxStatusMsg: 298 relativeCostTxStatusHistogram.Update(relCost) 299 } 300 } 301 // SendTxV2 and GetTxStatus requests are two special cases. 302 // All other requests will only put pressure on the database, and 303 // the corresponding delay is relatively stable. While these two 304 // requests involve txpool query, which is usually unstable. 305 // 306 // TODO(raisty) fixes this. 307 if r.msgCode == SendTxV2Msg || r.msgCode == GetTxStatusMsg { 308 continue 309 } 310 requestServedMeter.Mark(int64(r.servingTime)) 311 requestServedTimer.Update(time.Duration(r.servingTime)) 312 requestEstimatedMeter.Mark(int64(r.avgTimeCost / factor)) 313 requestEstimatedTimer.Update(time.Duration(r.avgTimeCost / factor)) 314 relativeCostHistogram.Update(relCost) 315 316 now := mclock.Now() 317 dt := float64(now - expUpdate) 318 expUpdate = now 319 exp := math.Exp(-dt / float64(gfUsageTC)) 320 321 // calculate factor correction until now, based on previous values 322 var gfCorr float64 323 max := recentTime 324 if recentAvg > max { 325 max = recentAvg 326 } 327 // we apply continuous correction when MAX(recentTime, recentAvg) > threshold 328 if max > threshold { 329 // calculate correction time between last expUpdate and now 330 if max*exp >= threshold { 331 gfCorr = dt 332 } else { 333 gfCorr = math.Log(max/threshold) * float64(gfUsageTC) 334 } 335 // calculate log(factor) correction with the right direction and time constant 336 if recentTime > recentAvg { 337 // drop factor if actual serving times are larger than average estimates 338 gfCorr /= -float64(gfDropTC) 339 } else { 340 // raise factor if actual serving times are smaller than average estimates 341 gfCorr /= float64(gfRaiseTC) 342 } 343 } 344 // update recent cost values with current request 345 recentTime = recentTime*exp + r.servingTime 346 recentAvg = recentAvg*exp + r.avgTimeCost/factor 347 348 if gfCorr != 0 { 349 // Apply the correction to factor 350 gfLog += gfCorr 351 factor = math.Exp(gfLog) 352 // Notify outside modules the new factor and totalRecharge. 353 if time.Duration(now-lastUpdate) > time.Second { 354 totalRecharge, lastUpdate = ct.utilTarget*factor, now 355 ct.gfLock.Lock() 356 ct.factor = factor 357 ch := ct.totalRechargeCh 358 ct.gfLock.Unlock() 359 if ch != nil { 360 select { 361 case ct.totalRechargeCh <- uint64(totalRecharge): 362 default: 363 } 364 } 365 globalFactorGauge.Update(int64(1000 * factor)) 366 log.Debug("global cost factor updated", "factor", factor) 367 } 368 } 369 recentServedGauge.Update(int64(recentTime)) 370 recentEstimatedGauge.Update(int64(recentAvg)) 371 372 case <-saveTicker.C: 373 saveCostFactor() 374 375 case stopCh := <-ct.stopCh: 376 saveCostFactor() 377 close(stopCh) 378 return 379 } 380 } 381 }() 382 } 383 384 // globalFactor returns the current value of the global cost factor 385 func (ct *costTracker) globalFactor() float64 { 386 ct.gfLock.RLock() 387 defer ct.gfLock.RUnlock() 388 389 return ct.factor 390 } 391 392 // totalRecharge returns the current total recharge parameter which is used by 393 // flowcontrol.ClientManager and is scaled by the global cost factor 394 func (ct *costTracker) totalRecharge() uint64 { 395 ct.gfLock.RLock() 396 defer ct.gfLock.RUnlock() 397 398 return uint64(ct.factor * ct.utilTarget) 399 } 400 401 // subscribeTotalRecharge returns all future updates to the total recharge value 402 // through a channel and also returns the current value 403 func (ct *costTracker) subscribeTotalRecharge(ch chan uint64) uint64 { 404 ct.gfLock.Lock() 405 defer ct.gfLock.Unlock() 406 407 ct.totalRechargeCh = ch 408 return uint64(ct.factor * ct.utilTarget) 409 } 410 411 // updateStats updates the global cost factor and (if enabled) the real cost vs. 412 // average estimate statistics 413 func (ct *costTracker) updateStats(code, amount, servingTime, realCost uint64) { 414 avg := reqAvgTimeCost[code] 415 avgTimeCost := avg.baseCost + amount*avg.reqCost 416 select { 417 case ct.reqInfoCh <- reqInfo{float64(avgTimeCost), float64(servingTime), code}: 418 default: 419 } 420 if makeCostStats { 421 realCost <<= 4 422 l := 0 423 for l < 9 && realCost > avgTimeCost { 424 l++ 425 realCost >>= 1 426 } 427 atomic.AddUint64(&ct.stats[code][l], 1) 428 } 429 } 430 431 // realCost calculates the final cost of a request based on actual serving time, 432 // incoming and outgoing message size 433 // 434 // Note: message size is only taken into account if bandwidth limitation is applied 435 // and the cost based on either message size is greater than the cost based on 436 // serving time. A maximum of the three costs is applied instead of their sum 437 // because the three limited resources (serving thread time and i/o bandwidth) can 438 // also be maxed out simultaneously. 439 func (ct *costTracker) realCost(servingTime uint64, inSize, outSize uint32) uint64 { 440 cost := float64(servingTime) 441 inSizeCost := float64(inSize) * ct.inSizeFactor 442 if inSizeCost > cost { 443 cost = inSizeCost 444 } 445 outSizeCost := float64(outSize) * ct.outSizeFactor 446 if outSizeCost > cost { 447 cost = outSizeCost 448 } 449 return uint64(cost * ct.globalFactor()) 450 } 451 452 // printStats prints the distribution of real request cost relative to the average estimates 453 func (ct *costTracker) printStats() { 454 if ct.stats == nil { 455 return 456 } 457 for code, arr := range ct.stats { 458 log.Info("Request cost statistics", "code", code, "1/16", arr[0], "1/8", arr[1], "1/4", arr[2], "1/2", arr[3], "1", arr[4], "2", arr[5], "4", arr[6], "8", arr[7], "16", arr[8], ">16", arr[9]) 459 } 460 } 461 462 type ( 463 // requestCostTable assigns a cost estimate function to each request type 464 // which is a linear function of the requested amount 465 // (cost = baseCost + reqCost * amount) 466 requestCostTable map[uint64]*requestCosts 467 requestCosts struct { 468 baseCost, reqCost uint64 469 } 470 471 // RequestCostList is a list representation of request costs which is used for 472 // database storage and communication through the network 473 RequestCostList []requestCostListItem 474 requestCostListItem struct { 475 MsgCode, BaseCost, ReqCost uint64 476 } 477 ) 478 479 // getMaxCost calculates the estimated cost for a given request type and amount 480 func (table requestCostTable) getMaxCost(code, amount uint64) uint64 { 481 costs := table[code] 482 return costs.baseCost + amount*costs.reqCost 483 } 484 485 // decode converts a cost list to a cost table 486 func (list RequestCostList) decode(protocolLength uint64) requestCostTable { 487 table := make(requestCostTable) 488 for _, e := range list { 489 if e.MsgCode < protocolLength { 490 table[e.MsgCode] = &requestCosts{ 491 baseCost: e.BaseCost, 492 reqCost: e.ReqCost, 493 } 494 } 495 } 496 return table 497 } 498 499 // testCostList returns a dummy request cost list used by tests 500 func testCostList(testCost uint64) RequestCostList { 501 cl := make(RequestCostList, len(reqAvgTimeCost)) 502 var max uint64 503 for code := range reqAvgTimeCost { 504 if code > max { 505 max = code 506 } 507 } 508 i := 0 509 for code := uint64(0); code <= max; code++ { 510 if _, ok := reqAvgTimeCost[code]; ok { 511 cl[i].MsgCode = code 512 cl[i].BaseCost = testCost 513 cl[i].ReqCost = 0 514 i++ 515 } 516 } 517 return cl 518 }