github.com/braveheart12/insolar-09-08-19@v0.8.7/ledger/heavyclient/heavy_client.go (about) 1 /* 2 * Copyright 2019 Insolar Technologies 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package heavyclient 18 19 import ( 20 "context" 21 "sync" 22 "time" 23 24 "github.com/insolar/insolar/configuration" 25 "github.com/insolar/insolar/core" 26 "github.com/insolar/insolar/core/reply" 27 "github.com/insolar/insolar/instrumentation/inslogger" 28 "github.com/insolar/insolar/instrumentation/insmetrics" 29 "github.com/insolar/insolar/ledger/storage" 30 "github.com/insolar/insolar/utils/backoff" 31 "github.com/pkg/errors" 32 "go.opencensus.io/stats" 33 ) 34 35 // Options contains heavy client configuration params. 36 type Options struct { 37 SyncMessageLimit int 38 PulsesDeltaLimit int 39 BackoffConf configuration.Backoff 40 } 41 42 // JetClient heavy replication client. Replicates records for one jet. 43 type JetClient struct { 44 bus core.MessageBus 45 pulseStorage core.PulseStorage 46 replicaStorage storage.ReplicaStorage 47 pulseTracker storage.PulseTracker 48 cleaner storage.Cleaner 49 db storage.DBContext 50 51 opts Options 52 53 // life cycle control 54 // 55 startOnce sync.Once 56 cancel context.CancelFunc 57 signal chan struct{} 58 // syncdone closes when syncloop is gracefully finished 59 syncdone chan struct{} 60 61 // state: 62 jetID core.RecordID 63 muPulses sync.Mutex 64 leftPulses []core.PulseNumber 65 syncbackoff *backoff.Backoff 66 } 67 68 // NewJetClient heavy replication client constructor. 69 // 70 // First argument defines what jet it serve. 71 func NewJetClient( 72 replicaStorage storage.ReplicaStorage, 73 mb core.MessageBus, 74 pulseStorage core.PulseStorage, 75 pulseTracker storage.PulseTracker, 76 cleaner storage.Cleaner, 77 db storage.DBContext, 78 jetID core.RecordID, 79 opts Options, 80 ) *JetClient { 81 jsc := &JetClient{ 82 bus: mb, 83 pulseStorage: pulseStorage, 84 replicaStorage: replicaStorage, 85 pulseTracker: pulseTracker, 86 cleaner: cleaner, 87 db: db, 88 jetID: jetID, 89 syncbackoff: backoffFromConfig(opts.BackoffConf), 90 signal: make(chan struct{}, 1), 91 syncdone: make(chan struct{}), 92 opts: opts, 93 } 94 return jsc 95 } 96 97 // should be called from protected by mutex code 98 func (c *JetClient) updateLeftPulsesMetrics(ctx context.Context) { 99 // instrumentation 100 var pn core.PulseNumber 101 if len(c.leftPulses) > 0 { 102 pn = c.leftPulses[0] 103 } 104 ctx = insmetrics.InsertTag(ctx, tagJet, c.jetID.DebugString()) 105 stats.Record(ctx, 106 statUnsyncedPulsesCount.M(int64(len(c.leftPulses))), 107 statFirstUnsyncedPulse.M(int64(pn)), 108 ) 109 } 110 111 // addPulses add pulse numbers for syncing. 112 func (c *JetClient) addPulses(ctx context.Context, pns []core.PulseNumber) { 113 c.muPulses.Lock() 114 c.leftPulses = append(c.leftPulses, pns...) 115 116 if err := c.replicaStorage.SetSyncClientJetPulses(ctx, c.jetID, c.leftPulses); err != nil { 117 inslogger.FromContext(ctx).Errorf( 118 "attempt to persist jet sync state failed: jetID=%v: %v", c.jetID, err.Error()) 119 } 120 121 c.updateLeftPulsesMetrics(ctx) 122 c.muPulses.Unlock() 123 } 124 125 func (c *JetClient) pulsesLeft() int { 126 c.muPulses.Lock() 127 defer c.muPulses.Unlock() 128 return len(c.leftPulses) 129 } 130 131 // unshiftPulse removes and returns pulse number from head of processing queue. 132 func (c *JetClient) unshiftPulse(ctx context.Context) *core.PulseNumber { 133 c.muPulses.Lock() 134 defer c.muPulses.Unlock() 135 136 if len(c.leftPulses) == 0 { 137 return nil 138 } 139 result := c.leftPulses[0] 140 141 // shift array elements on one position to left 142 shifted := c.leftPulses[:len(c.leftPulses)-1] 143 copy(shifted, c.leftPulses[1:]) 144 c.leftPulses = shifted 145 146 if err := c.replicaStorage.SetSyncClientJetPulses(ctx, c.jetID, c.leftPulses); err != nil { 147 inslogger.FromContext(ctx).Errorf( 148 "attempt to persist jet sync state failed: jetID=%v: %v", c.jetID, err.Error()) 149 } 150 151 c.updateLeftPulsesMetrics(ctx) 152 return &result 153 } 154 155 func (c *JetClient) nextPulseNumber() (core.PulseNumber, bool) { 156 c.muPulses.Lock() 157 defer c.muPulses.Unlock() 158 159 if len(c.leftPulses) == 0 { 160 return 0, false 161 } 162 return c.leftPulses[0], true 163 } 164 165 func (c *JetClient) runOnce(ctx context.Context) { 166 // retrydelay = m.syncbackoff.ForAttempt(attempt) 167 c.startOnce.Do(func() { 168 // TODO: reset TraceID from context, or just don't use context? 169 // (TraceID not meaningful in async sync loop) 170 ctx, cancel := context.WithCancel(context.Background()) 171 c.cancel = cancel 172 go c.syncloop(ctx) 173 }) 174 } 175 176 func (c *JetClient) syncloop(ctx context.Context) { 177 inslog := inslogger.FromContext(ctx) 178 defer close(c.syncdone) 179 180 var ( 181 syncPN core.PulseNumber 182 hasNext bool 183 retrydelay time.Duration 184 ) 185 186 finishpulse := func() { 187 _ = c.unshiftPulse(ctx) 188 c.syncbackoff.Reset() 189 retrydelay = 0 190 } 191 192 for { 193 select { 194 case <-time.After(retrydelay): 195 // for first try delay should be zero 196 case <-ctx.Done(): 197 if c.pulsesLeft() == 0 { 198 // got cancel signal and have nothing to do 199 return 200 } 201 // client in canceled state signal but has smth to do 202 } 203 204 for { 205 // if we have pulses to sync, process it 206 syncPN, hasNext = c.nextPulseNumber() 207 if hasNext { 208 inslog.Debugf("synchronization next sync pulse num: %v (left=%v)", syncPN, c.leftPulses) 209 break 210 } 211 212 inslog.Debug("synchronization waiting signal what new pulse happens") 213 _, ok := <-c.signal 214 if !ok { 215 inslog.Info("stop is called, so we are should just stop syncronization loop") 216 return 217 } 218 } 219 220 if isPulseNumberOutdated(ctx, c.pulseTracker, c.pulseStorage, syncPN, c.opts.PulsesDeltaLimit) { 221 inslog.Infof("pulse %v on jet %v is outdated, skip it", syncPN, c.jetID) 222 finishpulse() 223 continue 224 } 225 226 inslog.Infof("start synchronization to heavy for pulse %v", syncPN) 227 228 shouldretry := false 229 isretry := c.syncbackoff.Attempt() > 0 230 231 syncerr := c.HeavySync(ctx, syncPN, isretry) 232 if syncerr != nil { 233 if heavyerr, ok := syncerr.(*reply.HeavyError); ok { 234 shouldretry = heavyerr.IsRetryable() 235 } 236 237 syncerr = errors.Wrap(syncerr, "HeavySync failed") 238 inslog.Errorf("%v (on attempt=%v, shouldretry=%v)", 239 syncerr.Error(), c.syncbackoff.Attempt(), shouldretry) 240 241 if shouldretry { 242 retrydelay = c.syncbackoff.Duration() 243 continue 244 } 245 // TODO: write some info to dust - 14.Dec.2018 @nordicdyno 246 } else { 247 ctx = insmetrics.InsertTag(ctx, tagJet, c.jetID.DebugString()) 248 stats.Record(ctx, 249 statSyncedPulsesCount.M(1), 250 ) 251 } 252 253 finishpulse() 254 } 255 256 } 257 258 // Stop stops heavy client replication 259 func (c *JetClient) Stop(ctx context.Context) { 260 // cancel should be set if client has started 261 if c.cancel != nil { 262 // two signals for sync loop to stop 263 c.cancel() 264 close(c.signal) 265 // waits sync loop to stop 266 <-c.syncdone 267 } 268 } 269 270 func backoffFromConfig(bconf configuration.Backoff) *backoff.Backoff { 271 return &backoff.Backoff{ 272 Jitter: bconf.Jitter, 273 Min: bconf.Min, 274 Max: bconf.Max, 275 Factor: bconf.Factor, 276 } 277 } 278 279 func isPulseNumberOutdated(ctx context.Context, pulseTracker storage.PulseTracker, pstore core.PulseStorage, pn core.PulseNumber, delta int) bool { 280 current, err := pstore.Current(ctx) 281 if err != nil { 282 panic(err) 283 } 284 285 currentPulse, err := pulseTracker.GetPulse(ctx, current.PulseNumber) 286 if err != nil { 287 panic(err) 288 } 289 290 pnPulse, err := pulseTracker.GetPulse(ctx, pn) 291 if err != nil { 292 inslogger.FromContext(ctx).Errorf("Can't get pulse by pulse number: %v", pn) 293 return true 294 } 295 296 return currentPulse.SerialNumber-delta > pnPulse.SerialNumber 297 }