github.com/0chain/gosdk@v1.17.11/zboxcore/sdk/writemarker_mutex.go (about)

     1  package sdk
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"net/http"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/0chain/errors"
    13  	"github.com/0chain/gosdk/constants"
    14  	"github.com/0chain/gosdk/zboxcore/blockchain"
    15  	"github.com/0chain/gosdk/zboxcore/client"
    16  	"github.com/0chain/gosdk/zboxcore/logger"
    17  	"github.com/0chain/gosdk/zboxcore/zboxutil"
    18  )
    19  
    20  type WMLockStatus int
    21  
    22  const (
    23  	WMLockStatusFailed WMLockStatus = iota
    24  	WMLockStatusPending
    25  	WMLockStatusOK
    26  )
    27  const WMLockWaitTime = 2 * time.Second
    28  
    29  type WMLockResult struct {
    30  	Status    WMLockStatus `json:"status,omitempty"`
    31  	CreatedAt int64        `json:"created_at,omitempty"`
    32  }
    33  
    34  // WriteMarkerMutex blobber WriteMarkerMutex client
    35  type WriteMarkerMutex struct {
    36  	mutex            sync.Mutex
    37  	allocationObj    *Allocation
    38  	lockedBlobbers   map[string]chan struct{}
    39  	leadBlobberIndex int
    40  }
    41  
    42  // CreateWriteMarkerMutex create WriteMarkerMutex for allocation
    43  func CreateWriteMarkerMutex(client *client.Client, allocationObj *Allocation) (*WriteMarkerMutex, error) {
    44  	if allocationObj == nil {
    45  		return nil, errors.Throw(constants.ErrInvalidParameter, "allocationObj")
    46  	}
    47  
    48  	lockedBlobbers := make(map[string]chan struct{})
    49  	for _, b := range allocationObj.Blobbers {
    50  		if b.ID == "" {
    51  			logger.Logger.Error(b.Baseurl, "blobber ID is empty string")
    52  			return nil, errors.Throw(constants.ErrInvalidParameter, "blobber ID cannot be an empty string")
    53  		}
    54  		lockedBlobbers[b.ID] = make(chan struct{}, 1)
    55  	}
    56  
    57  	return &WriteMarkerMutex{
    58  		allocationObj:    allocationObj,
    59  		lockedBlobbers:   lockedBlobbers,
    60  		leadBlobberIndex: 0,
    61  	}, nil
    62  }
    63  
    64  func (wmMu *WriteMarkerMutex) Unlock(
    65  	ctx context.Context, mask zboxutil.Uint128,
    66  	blobbers []*blockchain.StorageNode,
    67  	timeOut time.Duration, connID string,
    68  ) {
    69  	wg := &sync.WaitGroup{}
    70  	var pos uint64
    71  	for i := mask; !i.Equals64(0); i = i.And(zboxutil.NewUint128(1).Lsh(pos).Not()) {
    72  		pos = uint64(i.TrailingZeros())
    73  		if pos == uint64(wmMu.leadBlobberIndex) { // Skip lead blobber
    74  			continue
    75  		}
    76  		blobber := blobbers[pos]
    77  		wg.Add(1)
    78  		go wmMu.UnlockBlobber(ctx, blobber, connID, timeOut, wg)
    79  	}
    80  	wg.Wait()
    81  
    82  	// Now unlock lead blobber
    83  	wg.Add(1)
    84  	go wmMu.UnlockBlobber(ctx, blobbers[uint64(wmMu.leadBlobberIndex)], connID, timeOut, wg)
    85  	wg.Wait()
    86  }
    87  
    88  // Change status code to 204
    89  func (wmMu *WriteMarkerMutex) UnlockBlobber(
    90  	ctx context.Context, b *blockchain.StorageNode,
    91  	connID string, timeOut time.Duration, wg *sync.WaitGroup,
    92  ) {
    93  	defer wg.Done()
    94  	wmMu.lockedBlobbers[b.ID] <- struct{}{}
    95  	var err error
    96  	defer func() {
    97  		if err != nil {
    98  			logger.Logger.Error(err)
    99  		}
   100  	}()
   101  
   102  	var req *http.Request
   103  	req, err = zboxutil.NewWriteMarkerUnLockRequest(
   104  		b.Baseurl, wmMu.allocationObj.ID, wmMu.allocationObj.Tx, wmMu.allocationObj.sig, connID, "")
   105  	if err != nil {
   106  		return
   107  	}
   108  
   109  	var resp *http.Response
   110  	var shouldContinue bool
   111  	for retry := 0; retry < 3; retry++ {
   112  		err, shouldContinue = func() (err error, shouldContinue bool) {
   113  			reqCtx, ctxCncl := context.WithTimeout(ctx, timeOut)
   114  			resp, err = zboxutil.Client.Do(req.WithContext(reqCtx))
   115  			ctxCncl()
   116  
   117  			if err != nil {
   118  				return
   119  			}
   120  			if resp.Body != nil {
   121  				defer resp.Body.Close()
   122  			}
   123  			var (
   124  				msg  string
   125  				data []byte
   126  			)
   127  			if resp.StatusCode == http.StatusNoContent || resp.StatusCode == http.StatusOK {
   128  				logger.Logger.Info(b.Baseurl, connID, " unlocked")
   129  				return
   130  			}
   131  			if resp.StatusCode == http.StatusTooManyRequests {
   132  				logger.Logger.Info(b.Baseurl, connID, " got too many request error. Retrying")
   133  				var r int
   134  				r, err = zboxutil.GetRateLimitValue(resp)
   135  				if err != nil {
   136  					logger.Logger.Error(err)
   137  					return
   138  				}
   139  				time.Sleep(time.Duration(r) * time.Second)
   140  				shouldContinue = true
   141  				return
   142  			}
   143  
   144  			data, err = io.ReadAll(resp.Body)
   145  			if err != nil {
   146  				logger.Logger.Error(err)
   147  				return
   148  			}
   149  
   150  			msg = string(data)
   151  			if msg == "EOF" {
   152  				logger.Logger.Debug(b.Baseurl, connID, " retrying request because "+
   153  					"server closed connection unexpectedly")
   154  				shouldContinue = true
   155  				return
   156  			}
   157  
   158  			err = errors.New("unknown_status",
   159  				fmt.Sprintf("Blobber %s responded with status %d and message %s",
   160  					b.Baseurl, resp.StatusCode, string(data)))
   161  
   162  			return
   163  		}()
   164  
   165  		if err != nil {
   166  			return
   167  		}
   168  		if shouldContinue {
   169  			continue
   170  		}
   171  		return
   172  	}
   173  }
   174  
   175  func (wmMu *WriteMarkerMutex) Lock(
   176  	ctx context.Context, mask *zboxutil.Uint128,
   177  	maskMu *sync.Mutex, blobbers []*blockchain.StorageNode,
   178  	consensus *Consensus, addConsensus int, timeOut time.Duration, connID string) error {
   179  
   180  	wmMu.mutex.Lock()
   181  	defer wmMu.mutex.Unlock()
   182  
   183  	consensus.Reset()
   184  	consensus.consensus = addConsensus
   185  
   186  	wg := &sync.WaitGroup{}
   187  
   188  	// Lock first responsive blobber as lead blobber
   189  	for ; wmMu.leadBlobberIndex < len(blobbers); wmMu.leadBlobberIndex++ {
   190  		methodCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
   191  		defer cancel()
   192  		leadBlobber := blobbers[uint64(wmMu.leadBlobberIndex)]
   193  		wg.Add(1)
   194  		go wmMu.lockBlobber(methodCtx, mask, maskMu, consensus, leadBlobber, uint64(wmMu.leadBlobberIndex), connID, timeOut, wg)
   195  		wg.Wait()
   196  		if consensus.getConsensus()-addConsensus == 1 {
   197  			break
   198  		}
   199  		select {
   200  		case <-methodCtx.Done():
   201  			logger.Logger.Error("Locking blobber: ", leadBlobber.Baseurl, " context timeout exceeded")
   202  			return errors.New("lock_timeout", "Locking blobber: "+leadBlobber.Baseurl+" context timeout exceeded")
   203  		default:
   204  		}
   205  	}
   206  
   207  	if consensus.getConsensus()-addConsensus != 1 {
   208  		return errors.New("lock_consensus_not_met", "Failed to lock the lead blobber after retries")
   209  	}
   210  
   211  	// Once the lead blobber is locked successfully, lock the other blobbers
   212  	var pos uint64
   213  	for i := *mask; !i.Equals64(0); i = i.And(zboxutil.NewUint128(1).Lsh(pos).Not()) {
   214  		pos = uint64(i.TrailingZeros())
   215  		if pos == uint64(wmMu.leadBlobberIndex) {
   216  			continue
   217  		}
   218  		blobber := blobbers[pos]
   219  		wg.Add(1)
   220  		go wmMu.lockBlobber(ctx, mask, maskMu, consensus, blobber, pos, connID, timeOut, wg)
   221  	}
   222  	wg.Wait()
   223  	if !consensus.isConsensusOk() {
   224  		wmMu.Unlock(ctx, *mask, blobbers, timeOut, connID)
   225  		return errors.New("lock_consensus_not_met",
   226  			fmt.Sprintf("Required consensus %d got %d",
   227  				consensus.consensusThresh, consensus.getConsensus()))
   228  	}
   229  
   230  	/* This goroutine will refresh lock after 30 seconds have passed. It will only complete if context is
   231  	   completed, that is why, the caller should make proper use of context and cancel it when work is done. */
   232  	go func() {
   233  		for {
   234  			<-time.NewTimer(30 * time.Second).C
   235  			select {
   236  			case <-ctx.Done():
   237  				return
   238  			default:
   239  			}
   240  
   241  			wg := &sync.WaitGroup{}
   242  			cons := &Consensus{RWMutex: &sync.RWMutex{}}
   243  			for i := *mask; !i.Equals64(0); i = i.And(zboxutil.NewUint128(1).Lsh(pos).Not()) {
   244  				pos = uint64(i.TrailingZeros())
   245  				blobber := blobbers[pos]
   246  				wg.Add(1)
   247  				go wmMu.lockBlobber(ctx, mask, maskMu, cons, blobber, pos, connID, timeOut, wg)
   248  			}
   249  			wg.Wait()
   250  		}
   251  	}()
   252  
   253  	return nil
   254  }
   255  
   256  func (wmMu *WriteMarkerMutex) lockBlobber(
   257  	ctx context.Context, mask *zboxutil.Uint128, maskMu *sync.Mutex,
   258  	consensus *Consensus, b *blockchain.StorageNode, pos uint64, connID string,
   259  	timeOut time.Duration, wg *sync.WaitGroup) {
   260  	defer wg.Done()
   261  
   262  	select {
   263  	case <-ctx.Done():
   264  		return
   265  	default:
   266  	}
   267  
   268  	wmMu.lockedBlobbers[b.ID] <- struct{}{}
   269  	defer func() {
   270  		<-wmMu.lockedBlobbers[b.ID]
   271  	}()
   272  
   273  	var err error
   274  	defer func() {
   275  		if err != nil {
   276  			logger.Logger.Error(err)
   277  			maskMu.Lock()
   278  			*mask = mask.And(zboxutil.NewUint128(1).Lsh(pos).Not())
   279  			maskMu.Unlock()
   280  		}
   281  	}()
   282  
   283  	var req *http.Request
   284  	req, err = zboxutil.NewWriteMarkerLockRequest(
   285  		b.Baseurl, wmMu.allocationObj.ID, wmMu.allocationObj.Tx, wmMu.allocationObj.sig, connID)
   286  	if err != nil {
   287  		return
   288  	}
   289  
   290  	var resp *http.Response
   291  	var shouldContinue bool
   292  	for retry := 0; retry < 3; retry++ {
   293  		select {
   294  		case <-ctx.Done():
   295  			return
   296  		default:
   297  		}
   298  		err, shouldContinue = func() (err error, shouldContinue bool) {
   299  			reqCtx, ctxCncl := context.WithTimeout(ctx, timeOut)
   300  			defer ctxCncl()
   301  			resp, err = zboxutil.Client.Do(req.WithContext(reqCtx))
   302  			if err != nil {
   303  				return
   304  			}
   305  			if resp.Body != nil {
   306  				defer resp.Body.Close()
   307  			}
   308  
   309  			var data []byte
   310  			if resp.StatusCode == http.StatusOK {
   311  				data, err = io.ReadAll(resp.Body)
   312  				if err != nil {
   313  					return
   314  				}
   315  				wmLockRes := &WMLockResult{}
   316  				err = json.Unmarshal(data, wmLockRes)
   317  				if err != nil {
   318  					return
   319  				}
   320  				if wmLockRes.Status == WMLockStatusOK {
   321  					consensus.Done()
   322  					logger.Logger.Info(b.Baseurl, connID, " locked")
   323  					return
   324  				}
   325  
   326  				if wmLockRes.Status == WMLockStatusPending {
   327  					logger.Logger.Info("Lock pending for blobber ",
   328  						b.Baseurl, "with connection id: ", connID, " Retrying again")
   329  					time.Sleep(WMLockWaitTime)
   330  					shouldContinue = true
   331  					retry--
   332  					return
   333  				}
   334  				err = fmt.Errorf("Lock acquiring failed")
   335  				return
   336  			}
   337  
   338  			if resp.StatusCode == http.StatusTooManyRequests {
   339  				logger.Logger.Info(
   340  					b.Baseurl, connID,
   341  					" got too many request error. Retrying")
   342  
   343  				var r int
   344  				r, err = zboxutil.GetRateLimitValue(resp)
   345  				if err != nil {
   346  					logger.Logger.Error(err)
   347  					return
   348  				}
   349  
   350  				time.Sleep(time.Duration(r) * time.Second)
   351  				shouldContinue = true
   352  				return
   353  			}
   354  
   355  			data, err = io.ReadAll(resp.Body)
   356  			if err != nil {
   357  				logger.Logger.Error(err)
   358  				return
   359  			}
   360  
   361  			err = errors.New("unknown_status",
   362  				fmt.Sprintf("Blobber %s responded with status %d and message %s",
   363  					b.Baseurl, resp.StatusCode, string(data)))
   364  			return
   365  		}()
   366  		if err != nil {
   367  			return
   368  		}
   369  		if !shouldContinue {
   370  			break
   371  		}
   372  	}
   373  }