github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/migration/reserveRepair.go (about) 1 // Copyright 2023 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package migration 6 7 import ( 8 "bytes" 9 "context" 10 "errors" 11 "fmt" 12 "runtime" 13 "sync" 14 "sync/atomic" 15 16 "github.com/ethersphere/bee/v2/pkg/log" 17 "github.com/ethersphere/bee/v2/pkg/storage" 18 "github.com/ethersphere/bee/v2/pkg/storer/internal/chunkstamp" 19 "github.com/ethersphere/bee/v2/pkg/storer/internal/reserve" 20 "github.com/ethersphere/bee/v2/pkg/storer/internal/transaction" 21 "github.com/ethersphere/bee/v2/pkg/swarm" 22 "golang.org/x/sync/errgroup" 23 ) 24 25 // ReserveRepairer is a migration step that removes all BinItem entries and migrates 26 // ChunkBinItem and BatchRadiusItem entries to use a new BinID field. 27 func ReserveRepairer( 28 st transaction.Storage, 29 chunkTypeFunc func(swarm.Chunk) swarm.ChunkType, 30 logger log.Logger, 31 ) func() error { 32 return func() error { 33 /* 34 STEP 0: remove epoch item 35 STEP 1: remove all of the BinItem entries 36 STEP 2: remove all of the ChunkBinItem entries 37 STEP 3: iterate BatchRadiusItem, get new binID 38 create new ChunkBinItem and BatchRadiusItem if the chunk exists in the chunkstore 39 if the chunk is invalid, it is removed from the chunkstore 40 STEP 4: save the latest binID to disk 41 */ 42 43 logger.Info("starting reserve repair tool, do not interrupt or kill the process...") 44 45 checkBinIDs := func() error { 46 // extra test that ensure that a unique binID has been issed to each item. 47 binIds := make(map[uint8]map[uint64]int) 48 return st.IndexStore().Iterate( 49 storage.Query{ 50 Factory: func() storage.Item { return &reserve.BatchRadiusItem{} }, 51 }, 52 func(res storage.Result) (bool, error) { 53 item := res.Entry.(*reserve.BatchRadiusItem) 54 if _, ok := binIds[item.Bin]; !ok { 55 binIds[item.Bin] = make(map[uint64]int) 56 } 57 binIds[item.Bin][item.BinID]++ 58 if binIds[item.Bin][item.BinID] > 1 { 59 return false, fmt.Errorf("binID %d in bin %d already used", item.BinID, item.Bin) 60 } 61 62 err := st.IndexStore().Get(&reserve.ChunkBinItem{Bin: item.Bin, BinID: item.BinID}) 63 if err != nil { 64 return false, fmt.Errorf("check failed: chunkBinItem, bin %d, binID %d: %w", item.Bin, item.BinID, err) 65 } 66 67 return false, nil 68 }, 69 ) 70 } 71 72 err := checkBinIDs() 73 if err != nil { 74 logger.Info("pre-repair check failed", "error", err) 75 } 76 77 // STEP 0 78 err = st.Run(context.Background(), func(s transaction.Store) error { 79 return s.IndexStore().Delete(&reserve.EpochItem{}) 80 }) 81 if err != nil { 82 return err 83 } 84 85 // STEP 1 86 err = st.Run(context.Background(), func(s transaction.Store) error { 87 for i := uint8(0); i < swarm.MaxBins; i++ { 88 err := s.IndexStore().Delete(&reserve.BinItem{Bin: i}) 89 if err != nil { 90 return err 91 } 92 } 93 return nil 94 }) 95 if err != nil { 96 return err 97 } 98 99 logger.Info("removed all bin index entries") 100 101 // STEP 2 102 var chunkBinItems []*reserve.ChunkBinItem 103 err = st.IndexStore().Iterate( 104 storage.Query{ 105 Factory: func() storage.Item { return &reserve.ChunkBinItem{} }, 106 }, 107 func(res storage.Result) (bool, error) { 108 item := res.Entry.(*reserve.ChunkBinItem) 109 chunkBinItems = append(chunkBinItems, item) 110 return false, nil 111 }, 112 ) 113 if err != nil { 114 return err 115 } 116 117 batchSize := 1000 118 119 for i := 0; i < len(chunkBinItems); i += batchSize { 120 end := i + batchSize 121 if end > len(chunkBinItems) { 122 end = len(chunkBinItems) 123 } 124 err := st.Run(context.Background(), func(s transaction.Store) error { 125 for _, item := range chunkBinItems[i:end] { 126 err := s.IndexStore().Delete(item) 127 if err != nil { 128 return err 129 } 130 } 131 return nil 132 }) 133 if err != nil { 134 return err 135 } 136 } 137 logger.Info("removed all chunk bin items", "total_entries", len(chunkBinItems)) 138 chunkBinItems = nil 139 140 // STEP 3 141 var batchRadiusItems []*reserve.BatchRadiusItem 142 err = st.IndexStore().Iterate( 143 storage.Query{ 144 Factory: func() storage.Item { return &reserve.BatchRadiusItem{} }, 145 }, 146 func(res storage.Result) (bool, error) { 147 item := res.Entry.(*reserve.BatchRadiusItem) 148 batchRadiusItems = append(batchRadiusItems, item) 149 150 return false, nil 151 }, 152 ) 153 if err != nil { 154 return err 155 } 156 157 logger.Info("counted all batch radius entries", "total_entries", len(batchRadiusItems)) 158 159 var missingChunks atomic.Int64 160 var invalidSharkyChunks atomic.Int64 161 162 var bins [swarm.MaxBins]uint64 163 var mtx sync.Mutex 164 newID := func(bin int) uint64 { 165 mtx.Lock() 166 defer mtx.Unlock() 167 168 bins[bin]++ 169 return bins[bin] 170 } 171 172 var eg errgroup.Group 173 174 p := runtime.NumCPU() 175 eg.SetLimit(p) 176 177 logger.Info("parallel workers", "count", p) 178 179 for _, item := range batchRadiusItems { 180 func(item *reserve.BatchRadiusItem) { 181 eg.Go(func() error { 182 183 return st.Run(context.Background(), func(s transaction.Store) error { 184 185 chunk, err := s.ChunkStore().Get(context.Background(), item.Address) 186 if err != nil { 187 if errors.Is(err, storage.ErrNotFound) { 188 missingChunks.Add(1) 189 return reserve.RemoveChunkWithItem(context.Background(), s, item) 190 } 191 return err 192 } 193 194 chunkType := chunkTypeFunc(chunk) 195 if chunkType == swarm.ChunkTypeUnspecified { 196 invalidSharkyChunks.Add(1) 197 return reserve.RemoveChunkWithItem(context.Background(), s, item) 198 } 199 200 item.BinID = newID(int(item.Bin)) 201 if bytes.Equal(item.StampHash, swarm.EmptyAddress.Bytes()) { 202 stamp, err := chunkstamp.LoadWithBatchID(s.IndexStore(), "reserve", item.Address, item.BatchID) 203 if err != nil { 204 return err 205 } 206 stampHash, err := stamp.Hash() 207 if err != nil { 208 return err 209 } 210 item.StampHash = stampHash 211 } 212 213 err = s.IndexStore().Put(item) 214 if err != nil { 215 return err 216 } 217 218 return s.IndexStore().Put(&reserve.ChunkBinItem{ 219 BatchID: item.BatchID, 220 Bin: item.Bin, 221 Address: item.Address, 222 BinID: item.BinID, 223 StampHash: item.StampHash, 224 ChunkType: chunkType, 225 }) 226 }) 227 }) 228 }(item) 229 } 230 231 err = eg.Wait() 232 if err != nil { 233 return err 234 } 235 236 // STEP 4 237 err = st.Run(context.Background(), func(s transaction.Store) error { 238 for bin, id := range bins { 239 err := s.IndexStore().Put(&reserve.BinItem{Bin: uint8(bin), BinID: id}) 240 if err != nil { 241 return err 242 } 243 } 244 return nil 245 }) 246 if err != nil { 247 return err 248 } 249 250 err = checkBinIDs() 251 if err != nil { 252 return err 253 } 254 255 batchRadiusCnt, err := st.IndexStore().Count(&reserve.BatchRadiusItem{}) 256 if err != nil { 257 return err 258 } 259 260 chunkBinCnt, err := st.IndexStore().Count(&reserve.ChunkBinItem{}) 261 if err != nil { 262 return err 263 } 264 265 logger.Info("migrated all chunk entries", "new_size", batchRadiusCnt, "missing_chunks", missingChunks.Load(), "invalid_sharky_chunks", invalidSharkyChunks.Load()) 266 267 if batchRadiusCnt != chunkBinCnt { 268 return fmt.Errorf("index counts do not match, %d vs %d", batchRadiusCnt, chunkBinCnt) 269 } 270 271 return nil 272 } 273 }