github.com/braveheart12/insolar-09-08-19@v0.8.7/ledger/heavyclient/heavy_client.go (about)

     1  /*
     2   *    Copyright 2019 Insolar Technologies
     3   *
     4   *    Licensed under the Apache License, Version 2.0 (the "License");
     5   *    you may not use this file except in compliance with the License.
     6   *    You may obtain a copy of the License at
     7   *
     8   *        http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   *    Unless required by applicable law or agreed to in writing, software
    11   *    distributed under the License is distributed on an "AS IS" BASIS,
    12   *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   *    See the License for the specific language governing permissions and
    14   *    limitations under the License.
    15   */
    16  
    17  package heavyclient
    18  
    19  import (
    20  	"context"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/insolar/insolar/configuration"
    25  	"github.com/insolar/insolar/core"
    26  	"github.com/insolar/insolar/core/reply"
    27  	"github.com/insolar/insolar/instrumentation/inslogger"
    28  	"github.com/insolar/insolar/instrumentation/insmetrics"
    29  	"github.com/insolar/insolar/ledger/storage"
    30  	"github.com/insolar/insolar/utils/backoff"
    31  	"github.com/pkg/errors"
    32  	"go.opencensus.io/stats"
    33  )
    34  
    35  // Options contains heavy client configuration params.
    36  type Options struct {
    37  	SyncMessageLimit int
    38  	PulsesDeltaLimit int
    39  	BackoffConf      configuration.Backoff
    40  }
    41  
    42  // JetClient heavy replication client. Replicates records for one jet.
    43  type JetClient struct {
    44  	bus            core.MessageBus
    45  	pulseStorage   core.PulseStorage
    46  	replicaStorage storage.ReplicaStorage
    47  	pulseTracker   storage.PulseTracker
    48  	cleaner        storage.Cleaner
    49  	db             storage.DBContext
    50  
    51  	opts Options
    52  
    53  	// life cycle control
    54  	//
    55  	startOnce sync.Once
    56  	cancel    context.CancelFunc
    57  	signal    chan struct{}
    58  	// syncdone closes when syncloop is gracefully finished
    59  	syncdone chan struct{}
    60  
    61  	// state:
    62  	jetID       core.RecordID
    63  	muPulses    sync.Mutex
    64  	leftPulses  []core.PulseNumber
    65  	syncbackoff *backoff.Backoff
    66  }
    67  
    68  // NewJetClient heavy replication client constructor.
    69  //
    70  // First argument defines what jet it serve.
    71  func NewJetClient(
    72  	replicaStorage storage.ReplicaStorage,
    73  	mb core.MessageBus,
    74  	pulseStorage core.PulseStorage,
    75  	pulseTracker storage.PulseTracker,
    76  	cleaner storage.Cleaner,
    77  	db storage.DBContext,
    78  	jetID core.RecordID,
    79  	opts Options,
    80  ) *JetClient {
    81  	jsc := &JetClient{
    82  		bus:            mb,
    83  		pulseStorage:   pulseStorage,
    84  		replicaStorage: replicaStorage,
    85  		pulseTracker:   pulseTracker,
    86  		cleaner:        cleaner,
    87  		db:             db,
    88  		jetID:          jetID,
    89  		syncbackoff:    backoffFromConfig(opts.BackoffConf),
    90  		signal:         make(chan struct{}, 1),
    91  		syncdone:       make(chan struct{}),
    92  		opts:           opts,
    93  	}
    94  	return jsc
    95  }
    96  
    97  // should be called from protected by mutex code
    98  func (c *JetClient) updateLeftPulsesMetrics(ctx context.Context) {
    99  	// instrumentation
   100  	var pn core.PulseNumber
   101  	if len(c.leftPulses) > 0 {
   102  		pn = c.leftPulses[0]
   103  	}
   104  	ctx = insmetrics.InsertTag(ctx, tagJet, c.jetID.DebugString())
   105  	stats.Record(ctx,
   106  		statUnsyncedPulsesCount.M(int64(len(c.leftPulses))),
   107  		statFirstUnsyncedPulse.M(int64(pn)),
   108  	)
   109  }
   110  
   111  // addPulses add pulse numbers for syncing.
   112  func (c *JetClient) addPulses(ctx context.Context, pns []core.PulseNumber) {
   113  	c.muPulses.Lock()
   114  	c.leftPulses = append(c.leftPulses, pns...)
   115  
   116  	if err := c.replicaStorage.SetSyncClientJetPulses(ctx, c.jetID, c.leftPulses); err != nil {
   117  		inslogger.FromContext(ctx).Errorf(
   118  			"attempt to persist jet sync state failed: jetID=%v: %v", c.jetID, err.Error())
   119  	}
   120  
   121  	c.updateLeftPulsesMetrics(ctx)
   122  	c.muPulses.Unlock()
   123  }
   124  
   125  func (c *JetClient) pulsesLeft() int {
   126  	c.muPulses.Lock()
   127  	defer c.muPulses.Unlock()
   128  	return len(c.leftPulses)
   129  }
   130  
   131  // unshiftPulse removes and returns pulse number from head of processing queue.
   132  func (c *JetClient) unshiftPulse(ctx context.Context) *core.PulseNumber {
   133  	c.muPulses.Lock()
   134  	defer c.muPulses.Unlock()
   135  
   136  	if len(c.leftPulses) == 0 {
   137  		return nil
   138  	}
   139  	result := c.leftPulses[0]
   140  
   141  	// shift array elements on one position to left
   142  	shifted := c.leftPulses[:len(c.leftPulses)-1]
   143  	copy(shifted, c.leftPulses[1:])
   144  	c.leftPulses = shifted
   145  
   146  	if err := c.replicaStorage.SetSyncClientJetPulses(ctx, c.jetID, c.leftPulses); err != nil {
   147  		inslogger.FromContext(ctx).Errorf(
   148  			"attempt to persist jet sync state failed: jetID=%v: %v", c.jetID, err.Error())
   149  	}
   150  
   151  	c.updateLeftPulsesMetrics(ctx)
   152  	return &result
   153  }
   154  
   155  func (c *JetClient) nextPulseNumber() (core.PulseNumber, bool) {
   156  	c.muPulses.Lock()
   157  	defer c.muPulses.Unlock()
   158  
   159  	if len(c.leftPulses) == 0 {
   160  		return 0, false
   161  	}
   162  	return c.leftPulses[0], true
   163  }
   164  
   165  func (c *JetClient) runOnce(ctx context.Context) {
   166  	// retrydelay = m.syncbackoff.ForAttempt(attempt)
   167  	c.startOnce.Do(func() {
   168  		// TODO: reset TraceID from context, or just don't use context?
   169  		// (TraceID not meaningful in async sync loop)
   170  		ctx, cancel := context.WithCancel(context.Background())
   171  		c.cancel = cancel
   172  		go c.syncloop(ctx)
   173  	})
   174  }
   175  
   176  func (c *JetClient) syncloop(ctx context.Context) {
   177  	inslog := inslogger.FromContext(ctx)
   178  	defer close(c.syncdone)
   179  
   180  	var (
   181  		syncPN     core.PulseNumber
   182  		hasNext    bool
   183  		retrydelay time.Duration
   184  	)
   185  
   186  	finishpulse := func() {
   187  		_ = c.unshiftPulse(ctx)
   188  		c.syncbackoff.Reset()
   189  		retrydelay = 0
   190  	}
   191  
   192  	for {
   193  		select {
   194  		case <-time.After(retrydelay):
   195  			// for first try delay should be zero
   196  		case <-ctx.Done():
   197  			if c.pulsesLeft() == 0 {
   198  				// got cancel signal and have nothing to do
   199  				return
   200  			}
   201  			// client in canceled state signal but has smth to do
   202  		}
   203  
   204  		for {
   205  			// if we have pulses to sync, process it
   206  			syncPN, hasNext = c.nextPulseNumber()
   207  			if hasNext {
   208  				inslog.Debugf("synchronization next sync pulse num: %v (left=%v)", syncPN, c.leftPulses)
   209  				break
   210  			}
   211  
   212  			inslog.Debug("synchronization waiting signal what new pulse happens")
   213  			_, ok := <-c.signal
   214  			if !ok {
   215  				inslog.Info("stop is called, so we are should just stop syncronization loop")
   216  				return
   217  			}
   218  		}
   219  
   220  		if isPulseNumberOutdated(ctx, c.pulseTracker, c.pulseStorage, syncPN, c.opts.PulsesDeltaLimit) {
   221  			inslog.Infof("pulse %v on jet %v is outdated, skip it", syncPN, c.jetID)
   222  			finishpulse()
   223  			continue
   224  		}
   225  
   226  		inslog.Infof("start synchronization to heavy for pulse %v", syncPN)
   227  
   228  		shouldretry := false
   229  		isretry := c.syncbackoff.Attempt() > 0
   230  
   231  		syncerr := c.HeavySync(ctx, syncPN, isretry)
   232  		if syncerr != nil {
   233  			if heavyerr, ok := syncerr.(*reply.HeavyError); ok {
   234  				shouldretry = heavyerr.IsRetryable()
   235  			}
   236  
   237  			syncerr = errors.Wrap(syncerr, "HeavySync failed")
   238  			inslog.Errorf("%v (on attempt=%v, shouldretry=%v)",
   239  				syncerr.Error(), c.syncbackoff.Attempt(), shouldretry)
   240  
   241  			if shouldretry {
   242  				retrydelay = c.syncbackoff.Duration()
   243  				continue
   244  			}
   245  			// TODO: write some info to dust - 14.Dec.2018 @nordicdyno
   246  		} else {
   247  			ctx = insmetrics.InsertTag(ctx, tagJet, c.jetID.DebugString())
   248  			stats.Record(ctx,
   249  				statSyncedPulsesCount.M(1),
   250  			)
   251  		}
   252  
   253  		finishpulse()
   254  	}
   255  
   256  }
   257  
   258  // Stop stops heavy client replication
   259  func (c *JetClient) Stop(ctx context.Context) {
   260  	// cancel should be set if client has started
   261  	if c.cancel != nil {
   262  		// two signals for sync loop to stop
   263  		c.cancel()
   264  		close(c.signal)
   265  		// waits sync loop to stop
   266  		<-c.syncdone
   267  	}
   268  }
   269  
   270  func backoffFromConfig(bconf configuration.Backoff) *backoff.Backoff {
   271  	return &backoff.Backoff{
   272  		Jitter: bconf.Jitter,
   273  		Min:    bconf.Min,
   274  		Max:    bconf.Max,
   275  		Factor: bconf.Factor,
   276  	}
   277  }
   278  
   279  func isPulseNumberOutdated(ctx context.Context, pulseTracker storage.PulseTracker, pstore core.PulseStorage, pn core.PulseNumber, delta int) bool {
   280  	current, err := pstore.Current(ctx)
   281  	if err != nil {
   282  		panic(err)
   283  	}
   284  
   285  	currentPulse, err := pulseTracker.GetPulse(ctx, current.PulseNumber)
   286  	if err != nil {
   287  		panic(err)
   288  	}
   289  
   290  	pnPulse, err := pulseTracker.GetPulse(ctx, pn)
   291  	if err != nil {
   292  		inslogger.FromContext(ctx).Errorf("Can't get pulse by pulse number: %v", pn)
   293  		return true
   294  	}
   295  
   296  	return currentPulse.SerialNumber-delta > pnPulse.SerialNumber
   297  }