github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/staging/manager.go (about) 1 package staging 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "math/rand" 8 "path" 9 "strings" 10 "time" 11 12 "github.com/treeverse/lakefs/pkg/batch" 13 "github.com/treeverse/lakefs/pkg/graveler" 14 "github.com/treeverse/lakefs/pkg/kv" 15 "github.com/treeverse/lakefs/pkg/logging" 16 ) 17 18 type Manager struct { 19 kvStore kv.Store 20 kvStoreLimited kv.Store 21 wakeup chan asyncEvent 22 batchExecutor batch.Batcher 23 batchDBIOTransactionMarkers bool 24 25 // cleanupCallback is being called with every successful cleanup cycle 26 cleanupCallback func() 27 } 28 29 // asyncEvent is a type of event to be handled in the async loop 30 type asyncEvent string 31 32 // cleanTokens is async cleaning of deleted staging tokens 33 const ( 34 MaxBatchDelay = 3 * time.Millisecond 35 cleanTokens = asyncEvent("clean_tokens") 36 ) 37 38 func NewManager(ctx context.Context, store, storeLimited kv.Store, batchDBIOTransactionMarkers bool, executor batch.Batcher) *Manager { 39 const wakeupChanCapacity = 100 40 m := &Manager{ 41 kvStore: store, 42 kvStoreLimited: storeLimited, 43 wakeup: make(chan asyncEvent, wakeupChanCapacity), 44 batchExecutor: executor, 45 batchDBIOTransactionMarkers: batchDBIOTransactionMarkers, 46 } 47 go m.asyncLoop(ctx) 48 return m 49 } 50 51 func (m *Manager) log(ctx context.Context) logging.Logger { 52 return logging.FromContext(ctx).WithField("service_name", "staging_manager") 53 } 54 55 func (m *Manager) OnCleanup(cleanupCallback func()) { 56 m.cleanupCallback = cleanupCallback 57 } 58 59 func (m *Manager) getBatchedEntryData(ctx context.Context, st graveler.StagingToken, key graveler.Key) (*graveler.StagedEntryData, error) { 60 batchKey := fmt.Sprintf("StagingGet:%s:%s", st, key) 61 dt, err := m.batchExecutor.BatchFor(ctx, batchKey, MaxBatchDelay, batch.ExecuterFunc(func() (interface{}, error) { 62 dt := &graveler.StagedEntryData{} 63 _, err := kv.GetMsg(ctx, m.kvStore, graveler.StagingTokenPartition(st), key, dt) 64 return dt, err 65 })) 66 if err != nil { 67 return nil, err 68 } 69 return dt.(*graveler.StagedEntryData), nil 70 } 71 72 // isDBIOTransactionalMarkerObject returns true if the key is a DBIO transactional marker object (_started or _committed 73 func isDBIOTransactionalMarkerObject(key graveler.Key) bool { 74 _, f := path.Split(key.String()) 75 return strings.HasPrefix(f, "_started_") || strings.HasPrefix(f, "_committed_") 76 } 77 78 func (m *Manager) Get(ctx context.Context, st graveler.StagingToken, key graveler.Key) (*graveler.Value, error) { 79 var err error 80 data := &graveler.StagedEntryData{} 81 // workaround for batching DBIO markers objects 82 if m.batchDBIOTransactionMarkers && isDBIOTransactionalMarkerObject(key) { 83 data, err = m.getBatchedEntryData(ctx, st, key) 84 } else { 85 _, err = kv.GetMsg(ctx, m.kvStore, graveler.StagingTokenPartition(st), key, data) 86 } 87 88 if err != nil { 89 if errors.Is(err, kv.ErrNotFound) { 90 err = graveler.ErrNotFound 91 } 92 return nil, err 93 } 94 // Tombstone handling 95 if data.Identity == nil { 96 return nil, nil 97 } 98 return graveler.StagedEntryFromProto(data), nil 99 } 100 101 func (m *Manager) Set(ctx context.Context, st graveler.StagingToken, key graveler.Key, value *graveler.Value, requireExists bool) error { 102 // Tombstone handling 103 if value == nil { 104 value = new(graveler.Value) 105 } else if value.Identity == nil { 106 return graveler.ErrInvalidValue 107 } 108 109 pb := graveler.ProtoFromStagedEntry(key, value) 110 stPartition := graveler.StagingTokenPartition(st) 111 if requireExists { 112 return kv.SetMsgIf(ctx, m.kvStore, stPartition, key, pb, kv.PrecondConditionalExists) 113 } 114 return kv.SetMsg(ctx, m.kvStore, stPartition, key, pb) 115 } 116 117 func (m *Manager) Update(ctx context.Context, st graveler.StagingToken, key graveler.Key, updateFunc graveler.ValueUpdateFunc) error { 118 oldValueProto := &graveler.StagedEntryData{} 119 var oldValue *graveler.Value 120 pred, err := kv.GetMsg(ctx, m.kvStore, graveler.StagingTokenPartition(st), key, oldValueProto) 121 if err != nil { 122 if errors.Is(err, kv.ErrNotFound) { 123 oldValue = nil 124 } else { 125 return err 126 } 127 } else { 128 oldValue = graveler.StagedEntryFromProto(oldValueProto) 129 } 130 updatedValue, err := updateFunc(oldValue) 131 if err != nil { 132 // report error or skip if ErrSkipValueUpdate 133 if errors.Is(err, graveler.ErrSkipValueUpdate) { 134 return nil 135 } 136 return err 137 } 138 return kv.SetMsgIf(ctx, m.kvStore, graveler.StagingTokenPartition(st), key, graveler.ProtoFromStagedEntry(key, updatedValue), pred) 139 } 140 141 func (m *Manager) DropKey(ctx context.Context, st graveler.StagingToken, key graveler.Key) error { 142 return m.kvStore.Delete(ctx, []byte(graveler.StagingTokenPartition(st)), key) 143 } 144 145 // List returns an iterator of staged values on the staging token st 146 func (m *Manager) List(ctx context.Context, st graveler.StagingToken, batchSize int) graveler.ValueIterator { 147 return NewStagingIterator(ctx, m.kvStore, st, batchSize) 148 } 149 150 func (m *Manager) Drop(ctx context.Context, st graveler.StagingToken) error { 151 return m.DropByPrefix(ctx, st, []byte("")) 152 } 153 154 func (m *Manager) DropAsync(ctx context.Context, st graveler.StagingToken) error { 155 err := m.kvStore.Set(ctx, []byte(graveler.CleanupTokensPartition()), []byte(st), []byte("stub-value")) 156 select { 157 case m.wakeup <- cleanTokens: 158 default: 159 m.log(ctx).Debug("wakeup channel is full, skipping wakeup") 160 } 161 return err 162 } 163 164 func (m *Manager) DropByPrefix(ctx context.Context, st graveler.StagingToken, prefix graveler.Key) error { 165 return m.dropByPrefix(ctx, m.kvStore, st, prefix) 166 } 167 168 func (m *Manager) dropByPrefix(ctx context.Context, store kv.Store, st graveler.StagingToken, prefix graveler.Key) error { 169 itr, err := kv.ScanPrefix(ctx, store, []byte(graveler.StagingTokenPartition(st)), prefix, []byte("")) 170 if err != nil { 171 return err 172 } 173 defer itr.Close() 174 for itr.Next() { 175 err = store.Delete(ctx, []byte(graveler.StagingTokenPartition(st)), itr.Entry().Key) 176 if err != nil { 177 return err 178 } 179 } 180 181 return nil 182 } 183 184 func (m *Manager) asyncLoop(ctx context.Context) { 185 for { 186 select { 187 case <-ctx.Done(): 188 return 189 case event := <-m.wakeup: 190 switch event { 191 case cleanTokens: 192 err := m.findAndDrop(ctx) 193 if err != nil { 194 m.log(ctx).WithError(err).Error("Dropping tokens failed") 195 } else if m.cleanupCallback != nil { 196 m.cleanupCallback() 197 } 198 default: 199 panic(fmt.Sprintf("unknown event: %s", event)) 200 } 201 } 202 } 203 } 204 205 // findAndDrop lookup for staging tokens to delete and drop keys by prefix. Uses store limited to rate limit the access. 206 // it assumes we are processing the data in the background. 207 func (m *Manager) findAndDrop(ctx context.Context) error { 208 const maxTokensToFetch = 512 209 it, err := m.kvStoreLimited.Scan(ctx, []byte(graveler.CleanupTokensPartition()), kv.ScanOptions{BatchSize: maxTokensToFetch}) 210 if err != nil { 211 return err 212 } 213 defer it.Close() 214 215 // Collecting all the tokens so we can shuffle them. 216 // Shuffling reduces the chances of 2 servers working on the same staging token 217 var stagingTokens [][]byte 218 for len(stagingTokens) < maxTokensToFetch && it.Next() { 219 stagingTokens = append(stagingTokens, it.Entry().Key) 220 } 221 rand.Shuffle(len(stagingTokens), func(i, j int) { 222 stagingTokens[i], stagingTokens[j] = stagingTokens[j], stagingTokens[i] 223 }) 224 225 for _, token := range stagingTokens { 226 if err := m.dropByPrefix(ctx, m.kvStoreLimited, graveler.StagingToken(token), []byte("")); err != nil { 227 return err 228 } 229 if err := m.kvStoreLimited.Delete(ctx, []byte(graveler.CleanupTokensPartition()), token); err != nil { 230 return err 231 } 232 } 233 234 return it.Err() 235 }