github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/compact.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package storer
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"path"
    12  	"sort"
    13  	"time"
    14  
    15  	"github.com/ethersphere/bee/v2/pkg/sharky"
    16  	"github.com/ethersphere/bee/v2/pkg/storer/internal/chunkstore"
    17  	"github.com/ethersphere/bee/v2/pkg/swarm"
    18  )
    19  
    20  // Compact minimizes sharky disk usage by, using the current sharky locations from the storer,
    21  // relocating chunks starting from the end of the used slots to the first available slots.
    22  func Compact(ctx context.Context, basePath string, opts *Options, validate bool) error {
    23  	logger := opts.Logger
    24  
    25  	store, err := initStore(basePath, opts)
    26  	if err != nil {
    27  		return fmt.Errorf("failed creating levelDB index store: %w", err)
    28  	}
    29  	defer func() {
    30  		if err := store.Close(); err != nil {
    31  			logger.Error(err, "failed closing store")
    32  		}
    33  	}()
    34  
    35  	sharkyRecover, err := sharky.NewRecovery(path.Join(basePath, sharkyPath), sharkyNoOfShards, swarm.SocMaxChunkSize)
    36  	if err != nil {
    37  		return err
    38  	}
    39  	defer func() {
    40  		if err := sharkyRecover.Close(); err != nil {
    41  			logger.Error(err, "failed closing sharky recovery")
    42  		}
    43  	}()
    44  
    45  	if validate {
    46  		logger.Info("performing chunk validation before compaction")
    47  		validateWork(logger, store, sharkyRecover.Read)
    48  	}
    49  
    50  	logger.Info("starting compaction")
    51  
    52  	n := time.Now()
    53  
    54  	for shard := 0; shard < sharkyNoOfShards; shard++ {
    55  
    56  		select {
    57  		case <-ctx.Done():
    58  			return errors.Join(ctx.Err(), sharkyRecover.Save())
    59  		default:
    60  		}
    61  
    62  		items := make([]*chunkstore.RetrievalIndexItem, 0, 1_000_000)
    63  		// we deliberately choose to iterate the whole store again for each shard
    64  		// so that we do not store all the items in memory (for operators with huge localstores)
    65  		_ = chunkstore.IterateItems(store, func(item *chunkstore.RetrievalIndexItem) error {
    66  			if item.Location.Shard == uint8(shard) {
    67  				items = append(items, item)
    68  			}
    69  			return nil
    70  		})
    71  
    72  		sort.Slice(items, func(i, j int) bool {
    73  			return items[i].Location.Slot < items[j].Location.Slot
    74  		})
    75  
    76  		if len(items) < 1 {
    77  			return errors.New("no data to compact")
    78  		}
    79  		lastUsedSlot := items[len(items)-1].Location.Slot
    80  		slots := make([]*chunkstore.RetrievalIndexItem, lastUsedSlot+1) // marks free and used slots
    81  		for _, l := range items {
    82  			slots[l.Location.Slot] = l
    83  		}
    84  
    85  		// start begins at the zero slot. The loop below will increment the position of start until a free slot is found.
    86  		// end points to the last slot, and the loop will decrement the position of end until a used slot is found.
    87  		// Once start and end point to free and used slots, respectively, the swap of the chunk location will occur.
    88  		start := uint32(0)
    89  		end := lastUsedSlot
    90  
    91  		batch := store.Batch(ctx)
    92  		for start <= end {
    93  
    94  			if slots[start] != nil {
    95  				if err := sharkyRecover.Add(slots[start].Location); err != nil {
    96  					return fmt.Errorf("sharky add: %w", err)
    97  				}
    98  				start++ // walk to the right until a free slot is found
    99  				continue
   100  			}
   101  
   102  			if slots[end] == nil {
   103  				end-- // walk to the left until a used slot found
   104  				continue
   105  			}
   106  
   107  			from := slots[end]
   108  			to := sharky.Location{Slot: start, Length: from.Location.Length, Shard: from.Location.Shard}
   109  			if err := sharkyRecover.Move(context.Background(), from.Location, to); err != nil {
   110  				return fmt.Errorf("sharky move: %w", err)
   111  			}
   112  			if err := sharkyRecover.Add(to); err != nil {
   113  				return fmt.Errorf("sharky add: %w", err)
   114  			}
   115  
   116  			from.Location = to
   117  			if err := batch.Put(from); err != nil {
   118  				return fmt.Errorf("store put: %w", err)
   119  			}
   120  
   121  			start++
   122  			end--
   123  		}
   124  
   125  		if err := batch.Commit(); err != nil {
   126  			return err
   127  		}
   128  
   129  		logger.Info("shard truncated", "shard", fmt.Sprintf("%d/%d", shard, sharkyNoOfShards-1), "slot", end)
   130  
   131  		if err := sharkyRecover.TruncateAt(context.Background(), uint8(shard), end+1); err != nil {
   132  			return fmt.Errorf("sharky truncate: %w", err)
   133  		}
   134  	}
   135  
   136  	if err := sharkyRecover.Save(); err != nil {
   137  		return fmt.Errorf("sharky save: %w", err)
   138  	}
   139  
   140  	logger.Info("compaction finished", "duration", time.Since(n))
   141  
   142  	if validate {
   143  		logger.Info("performing chunk validation after compaction")
   144  		validateWork(logger, store, sharkyRecover.Read)
   145  	}
   146  
   147  	return nil
   148  }