github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/migration/reserveRepair.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package migration
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"errors"
    11  	"fmt"
    12  	"runtime"
    13  	"sync"
    14  	"sync/atomic"
    15  
    16  	"github.com/ethersphere/bee/v2/pkg/log"
    17  	"github.com/ethersphere/bee/v2/pkg/storage"
    18  	"github.com/ethersphere/bee/v2/pkg/storer/internal/chunkstamp"
    19  	"github.com/ethersphere/bee/v2/pkg/storer/internal/reserve"
    20  	"github.com/ethersphere/bee/v2/pkg/storer/internal/transaction"
    21  	"github.com/ethersphere/bee/v2/pkg/swarm"
    22  	"golang.org/x/sync/errgroup"
    23  )
    24  
    25  // ReserveRepairer is a migration step that removes all BinItem entries and migrates
    26  // ChunkBinItem and BatchRadiusItem entries to use a new BinID field.
    27  func ReserveRepairer(
    28  	st transaction.Storage,
    29  	chunkTypeFunc func(swarm.Chunk) swarm.ChunkType,
    30  	logger log.Logger,
    31  ) func() error {
    32  	return func() error {
    33  		/*
    34  			STEP 0:	remove epoch item
    35  			STEP 1:	remove all of the BinItem entries
    36  			STEP 2:	remove all of the ChunkBinItem entries
    37  			STEP 3:	iterate BatchRadiusItem, get new binID
    38  					create new ChunkBinItem and BatchRadiusItem if the chunk exists in the chunkstore
    39  					if the chunk is invalid, it is removed from the chunkstore
    40  			STEP 4: save the latest binID to disk
    41  		*/
    42  
    43  		logger.Info("starting reserve repair tool, do not interrupt or kill the process...")
    44  
    45  		checkBinIDs := func() error {
    46  			// extra test that ensure that a unique binID has been issed to each item.
    47  			binIds := make(map[uint8]map[uint64]int)
    48  			return st.IndexStore().Iterate(
    49  				storage.Query{
    50  					Factory: func() storage.Item { return &reserve.BatchRadiusItem{} },
    51  				},
    52  				func(res storage.Result) (bool, error) {
    53  					item := res.Entry.(*reserve.BatchRadiusItem)
    54  					if _, ok := binIds[item.Bin]; !ok {
    55  						binIds[item.Bin] = make(map[uint64]int)
    56  					}
    57  					binIds[item.Bin][item.BinID]++
    58  					if binIds[item.Bin][item.BinID] > 1 {
    59  						return false, fmt.Errorf("binID %d in bin %d already used", item.BinID, item.Bin)
    60  					}
    61  
    62  					err := st.IndexStore().Get(&reserve.ChunkBinItem{Bin: item.Bin, BinID: item.BinID})
    63  					if err != nil {
    64  						return false, fmt.Errorf("check failed: chunkBinItem, bin %d, binID %d: %w", item.Bin, item.BinID, err)
    65  					}
    66  
    67  					return false, nil
    68  				},
    69  			)
    70  		}
    71  
    72  		err := checkBinIDs()
    73  		if err != nil {
    74  			logger.Info("pre-repair check failed", "error", err)
    75  		}
    76  
    77  		// STEP 0
    78  		err = st.Run(context.Background(), func(s transaction.Store) error {
    79  			return s.IndexStore().Delete(&reserve.EpochItem{})
    80  		})
    81  		if err != nil {
    82  			return err
    83  		}
    84  
    85  		// STEP 1
    86  		err = st.Run(context.Background(), func(s transaction.Store) error {
    87  			for i := uint8(0); i < swarm.MaxBins; i++ {
    88  				err := s.IndexStore().Delete(&reserve.BinItem{Bin: i})
    89  				if err != nil {
    90  					return err
    91  				}
    92  			}
    93  			return nil
    94  		})
    95  		if err != nil {
    96  			return err
    97  		}
    98  
    99  		logger.Info("removed all bin index entries")
   100  
   101  		// STEP 2
   102  		var chunkBinItems []*reserve.ChunkBinItem
   103  		err = st.IndexStore().Iterate(
   104  			storage.Query{
   105  				Factory: func() storage.Item { return &reserve.ChunkBinItem{} },
   106  			},
   107  			func(res storage.Result) (bool, error) {
   108  				item := res.Entry.(*reserve.ChunkBinItem)
   109  				chunkBinItems = append(chunkBinItems, item)
   110  				return false, nil
   111  			},
   112  		)
   113  		if err != nil {
   114  			return err
   115  		}
   116  
   117  		batchSize := 1000
   118  
   119  		for i := 0; i < len(chunkBinItems); i += batchSize {
   120  			end := i + batchSize
   121  			if end > len(chunkBinItems) {
   122  				end = len(chunkBinItems)
   123  			}
   124  			err := st.Run(context.Background(), func(s transaction.Store) error {
   125  				for _, item := range chunkBinItems[i:end] {
   126  					err := s.IndexStore().Delete(item)
   127  					if err != nil {
   128  						return err
   129  					}
   130  				}
   131  				return nil
   132  			})
   133  			if err != nil {
   134  				return err
   135  			}
   136  		}
   137  		logger.Info("removed all chunk bin items", "total_entries", len(chunkBinItems))
   138  		chunkBinItems = nil
   139  
   140  		// STEP 3
   141  		var batchRadiusItems []*reserve.BatchRadiusItem
   142  		err = st.IndexStore().Iterate(
   143  			storage.Query{
   144  				Factory: func() storage.Item { return &reserve.BatchRadiusItem{} },
   145  			},
   146  			func(res storage.Result) (bool, error) {
   147  				item := res.Entry.(*reserve.BatchRadiusItem)
   148  				batchRadiusItems = append(batchRadiusItems, item)
   149  
   150  				return false, nil
   151  			},
   152  		)
   153  		if err != nil {
   154  			return err
   155  		}
   156  
   157  		logger.Info("counted all batch radius entries", "total_entries", len(batchRadiusItems))
   158  
   159  		var missingChunks atomic.Int64
   160  		var invalidSharkyChunks atomic.Int64
   161  
   162  		var bins [swarm.MaxBins]uint64
   163  		var mtx sync.Mutex
   164  		newID := func(bin int) uint64 {
   165  			mtx.Lock()
   166  			defer mtx.Unlock()
   167  
   168  			bins[bin]++
   169  			return bins[bin]
   170  		}
   171  
   172  		var eg errgroup.Group
   173  
   174  		p := runtime.NumCPU()
   175  		eg.SetLimit(p)
   176  
   177  		logger.Info("parallel workers", "count", p)
   178  
   179  		for _, item := range batchRadiusItems {
   180  			func(item *reserve.BatchRadiusItem) {
   181  				eg.Go(func() error {
   182  
   183  					return st.Run(context.Background(), func(s transaction.Store) error {
   184  
   185  						chunk, err := s.ChunkStore().Get(context.Background(), item.Address)
   186  						if err != nil {
   187  							if errors.Is(err, storage.ErrNotFound) {
   188  								missingChunks.Add(1)
   189  								return reserve.RemoveChunkWithItem(context.Background(), s, item)
   190  							}
   191  							return err
   192  						}
   193  
   194  						chunkType := chunkTypeFunc(chunk)
   195  						if chunkType == swarm.ChunkTypeUnspecified {
   196  							invalidSharkyChunks.Add(1)
   197  							return reserve.RemoveChunkWithItem(context.Background(), s, item)
   198  						}
   199  
   200  						item.BinID = newID(int(item.Bin))
   201  						if bytes.Equal(item.StampHash, swarm.EmptyAddress.Bytes()) {
   202  							stamp, err := chunkstamp.LoadWithBatchID(s.IndexStore(), "reserve", item.Address, item.BatchID)
   203  							if err != nil {
   204  								return err
   205  							}
   206  							stampHash, err := stamp.Hash()
   207  							if err != nil {
   208  								return err
   209  							}
   210  							item.StampHash = stampHash
   211  						}
   212  
   213  						err = s.IndexStore().Put(item)
   214  						if err != nil {
   215  							return err
   216  						}
   217  
   218  						return s.IndexStore().Put(&reserve.ChunkBinItem{
   219  							BatchID:   item.BatchID,
   220  							Bin:       item.Bin,
   221  							Address:   item.Address,
   222  							BinID:     item.BinID,
   223  							StampHash: item.StampHash,
   224  							ChunkType: chunkType,
   225  						})
   226  					})
   227  				})
   228  			}(item)
   229  		}
   230  
   231  		err = eg.Wait()
   232  		if err != nil {
   233  			return err
   234  		}
   235  
   236  		// STEP 4
   237  		err = st.Run(context.Background(), func(s transaction.Store) error {
   238  			for bin, id := range bins {
   239  				err := s.IndexStore().Put(&reserve.BinItem{Bin: uint8(bin), BinID: id})
   240  				if err != nil {
   241  					return err
   242  				}
   243  			}
   244  			return nil
   245  		})
   246  		if err != nil {
   247  			return err
   248  		}
   249  
   250  		err = checkBinIDs()
   251  		if err != nil {
   252  			return err
   253  		}
   254  
   255  		batchRadiusCnt, err := st.IndexStore().Count(&reserve.BatchRadiusItem{})
   256  		if err != nil {
   257  			return err
   258  		}
   259  
   260  		chunkBinCnt, err := st.IndexStore().Count(&reserve.ChunkBinItem{})
   261  		if err != nil {
   262  			return err
   263  		}
   264  
   265  		logger.Info("migrated all chunk entries", "new_size", batchRadiusCnt, "missing_chunks", missingChunks.Load(), "invalid_sharky_chunks", invalidSharkyChunks.Load())
   266  
   267  		if batchRadiusCnt != chunkBinCnt {
   268  			return fmt.Errorf("index counts do not match, %d vs %d", batchRadiusCnt, chunkBinCnt)
   269  		}
   270  
   271  		return nil
   272  	}
   273  }