github.com/weaviate/weaviate@v1.24.6/usecases/backup/shard.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package backup
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"sync"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	"github.com/sirupsen/logrus"
    22  	enterrors "github.com/weaviate/weaviate/entities/errors"
    23  
    24  	"github.com/weaviate/weaviate/entities/backup"
    25  )
    26  
    27  const (
    28  	_TimeoutShardCommit = 20 * time.Second
    29  )
    30  
    31  type reqStat struct {
    32  	Starttime time.Time
    33  	ID        string
    34  	Status    backup.Status
    35  	Path      string
    36  }
    37  
    38  type backupStat struct {
    39  	sync.Mutex
    40  	reqStat
    41  }
    42  
    43  func (s *backupStat) get() reqStat {
    44  	s.Lock()
    45  	defer s.Unlock()
    46  	return s.reqStat
    47  }
    48  
    49  // renew state if and only it is not in use
    50  // it returns "" in case of success and current id in case of failure
    51  func (s *backupStat) renew(id string, path string) string {
    52  	s.Lock()
    53  	defer s.Unlock()
    54  	if s.reqStat.ID != "" {
    55  		return s.reqStat.ID
    56  	}
    57  	s.reqStat.ID = id
    58  	s.reqStat.Path = path
    59  	s.reqStat.Starttime = time.Now().UTC()
    60  	s.reqStat.Status = backup.Started
    61  	return ""
    62  }
    63  
    64  func (s *backupStat) reset() {
    65  	s.Lock()
    66  	s.reqStat.ID = ""
    67  	s.reqStat.Path = ""
    68  	s.reqStat.Status = ""
    69  	s.Unlock()
    70  }
    71  
    72  func (s *backupStat) set(st backup.Status) {
    73  	s.Lock()
    74  	s.reqStat.Status = st
    75  	s.Unlock()
    76  }
    77  
    78  // shardSyncChan makes sure that a backup operation is mutually exclusive.
    79  // It also contains the channel used to communicate with the coordinator.
    80  type shardSyncChan struct {
    81  	// lastOp makes sure backup operations are mutually exclusive
    82  	lastOp backupStat
    83  
    84  	// waitingForCoordinatorToCommit use while waiting for the coordinator to take the next action
    85  	waitingForCoordinatorToCommit atomic.Bool
    86  	//  coordChan used to communicate with the coordinator
    87  	coordChan chan interface{}
    88  
    89  	// lastAsyncError used for debugging when no metadata is created
    90  	lastAsyncError error
    91  }
    92  
    93  // waitForCoordinator to confirm or to abort previous operation
    94  func (c *shardSyncChan) waitForCoordinator(d time.Duration, id string) error {
    95  	defer c.waitingForCoordinatorToCommit.Store(false)
    96  	if d == 0 {
    97  		return nil
    98  	}
    99  
   100  	timer := time.NewTimer(d)
   101  	defer timer.Stop()
   102  	for {
   103  		select {
   104  		case <-timer.C:
   105  			return fmt.Errorf("timed out waiting for coordinator to commit")
   106  		case v := <-c.coordChan:
   107  			switch v := v.(type) {
   108  			case AbortRequest:
   109  				if v.ID == id {
   110  					return fmt.Errorf("coordinator aborted operation")
   111  				}
   112  			case StatusRequest:
   113  				if v.ID == id {
   114  					return nil
   115  				}
   116  			}
   117  		}
   118  	}
   119  }
   120  
   121  // withCancellation return a new context which will be cancelled if the coordinator
   122  // want to abort the commit phase
   123  func (c *shardSyncChan) withCancellation(ctx context.Context, id string, done chan struct{}, logger logrus.FieldLogger) context.Context {
   124  	ctx, cancel := context.WithCancel(ctx)
   125  	enterrors.GoWrapper(func() {
   126  		defer cancel()
   127  		for {
   128  			select {
   129  			case v := <-c.coordChan:
   130  				switch v := v.(type) {
   131  				case AbortRequest:
   132  					if v.ID == id {
   133  						return
   134  					}
   135  				}
   136  			case <-done: // caller is done
   137  				return
   138  			}
   139  		}
   140  	}, logger)
   141  	return ctx
   142  }
   143  
   144  // OnCommit will be triggered when the coordinator confirms the execution of a previous operation
   145  func (c *shardSyncChan) OnCommit(ctx context.Context, req *StatusRequest) error {
   146  	st := c.lastOp.get()
   147  	if st.ID == req.ID && c.waitingForCoordinatorToCommit.Load() {
   148  		c.coordChan <- *req
   149  		return nil
   150  	}
   151  	return fmt.Errorf("shard has abandon backup operation")
   152  }
   153  
   154  // Abort tells a node to abort the previous backup operation
   155  func (c *shardSyncChan) OnAbort(_ context.Context, req *AbortRequest) error {
   156  	st := c.lastOp.get()
   157  	if st.ID == req.ID {
   158  		c.coordChan <- *req
   159  		return nil
   160  	}
   161  	return nil
   162  }