github.com/ethersphere/bee/v2@v2.2.0/pkg/file/redundancy/redundancy.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package redundancy
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/ethersphere/bee/v2/pkg/file/pipeline"
    11  	"github.com/ethersphere/bee/v2/pkg/swarm"
    12  	"github.com/klauspost/reedsolomon"
    13  )
    14  
    15  // ParityChunkCallback is called when a new parity chunk has been created
    16  type ParityChunkCallback func(level int, span, address []byte) error
    17  
    18  type RedundancyParams interface {
    19  	MaxShards() int // returns the maximum data shard number being used in an intermediate chunk
    20  	Level() Level
    21  	Parities(int) int
    22  	ChunkWrite(int, []byte, ParityChunkCallback) error
    23  	ElevateCarrierChunk(int, ParityChunkCallback) error
    24  	Encode(int, ParityChunkCallback) error
    25  	GetRootData() ([]byte, error)
    26  }
    27  
    28  type ErasureEncoder interface {
    29  	Encode([][]byte) error
    30  }
    31  
    32  var erasureEncoderFunc = func(shards, parities int) (ErasureEncoder, error) {
    33  	return reedsolomon.New(shards, parities)
    34  }
    35  
    36  type Params struct {
    37  	level      Level
    38  	pipeLine   pipeline.PipelineFunc
    39  	buffer     [][][]byte // keeps bytes of chunks on each level for producing erasure coded data; [levelIndex][branchIndex][byteIndex]
    40  	cursor     []int      // index of the current buffered chunk in Buffer. this is basically the latest used branchIndex.
    41  	maxShards  int        // number of chunks after which the parity encode function should be called
    42  	maxParity  int        // number of parity chunks if maxShards has been reached for erasure coding
    43  	encryption bool
    44  }
    45  
    46  func New(level Level, encryption bool, pipeLine pipeline.PipelineFunc) *Params {
    47  	maxShards := 0
    48  	maxParity := 0
    49  	if encryption {
    50  		maxShards = level.GetMaxEncShards()
    51  		maxParity = level.GetParities(swarm.EncryptedBranches)
    52  	} else {
    53  		maxShards = level.GetMaxShards()
    54  		maxParity = level.GetParities(swarm.BmtBranches)
    55  	}
    56  	// init dataBuffer for erasure coding
    57  	rsChunkLevels := 0
    58  	if level != NONE {
    59  		rsChunkLevels = 8
    60  	}
    61  	Buffer := make([][][]byte, rsChunkLevels)
    62  	for i := 0; i < rsChunkLevels; i++ {
    63  		Buffer[i] = make([][]byte, swarm.BmtBranches) // 128 long always because buffer varies at encrypted chunks
    64  	}
    65  
    66  	return &Params{
    67  		level:      level,
    68  		pipeLine:   pipeLine,
    69  		buffer:     Buffer,
    70  		cursor:     make([]int, 9),
    71  		maxShards:  maxShards,
    72  		maxParity:  maxParity,
    73  		encryption: encryption,
    74  	}
    75  }
    76  
    77  func (p *Params) MaxShards() int {
    78  	return p.maxShards
    79  }
    80  
    81  func (p *Params) Level() Level {
    82  	return p.level
    83  }
    84  
    85  func (p *Params) Parities(shards int) int {
    86  	if p.encryption {
    87  		return p.level.GetEncParities(shards)
    88  	}
    89  	return p.level.GetParities(shards)
    90  }
    91  
    92  // ChunkWrite caches the chunk data on the given chunk level and if it is full then it calls Encode
    93  func (p *Params) ChunkWrite(chunkLevel int, data []byte, callback ParityChunkCallback) error {
    94  	if p.level == NONE {
    95  		return nil
    96  	}
    97  	if len(data) != swarm.ChunkWithSpanSize {
    98  		zeros := make([]byte, swarm.ChunkWithSpanSize-len(data))
    99  		data = append(data, zeros...)
   100  	}
   101  
   102  	return p.chunkWrite(chunkLevel, data, callback)
   103  }
   104  
   105  // ChunkWrite caches the chunk data on the given chunk level and if it is full then it calls Encode
   106  func (p *Params) chunkWrite(chunkLevel int, data []byte, callback ParityChunkCallback) error {
   107  	// append chunk to the buffer
   108  	p.buffer[chunkLevel][p.cursor[chunkLevel]] = data
   109  	p.cursor[chunkLevel]++
   110  
   111  	// add parity chunk if it is necessary
   112  	if p.cursor[chunkLevel] == p.maxShards {
   113  		// append erasure coded data
   114  		return p.encode(chunkLevel, callback)
   115  	}
   116  	return nil
   117  }
   118  
   119  // Encode produces and stores parity chunks that will be also passed back to the caller
   120  func (p *Params) Encode(chunkLevel int, callback ParityChunkCallback) error {
   121  	if p.level == NONE || p.cursor[chunkLevel] == 0 {
   122  		return nil
   123  	}
   124  
   125  	return p.encode(chunkLevel, callback)
   126  }
   127  
   128  func (p *Params) encode(chunkLevel int, callback ParityChunkCallback) error {
   129  	shards := p.cursor[chunkLevel]
   130  	parities := p.Parities(shards)
   131  
   132  	n := shards + parities
   133  	// realloc for parity chunks if it does not override the prev one
   134  	// calculate parity chunks
   135  	enc, err := erasureEncoderFunc(shards, parities)
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	pz := len(p.buffer[chunkLevel][0])
   141  	for i := shards; i < n; i++ {
   142  		p.buffer[chunkLevel][i] = make([]byte, pz)
   143  	}
   144  	err = enc.Encode(p.buffer[chunkLevel][:n])
   145  	if err != nil {
   146  		return err
   147  	}
   148  
   149  	for i := shards; i < n; i++ {
   150  		chunkData := p.buffer[chunkLevel][i]
   151  		span := chunkData[:swarm.SpanSize]
   152  
   153  		writer := p.pipeLine()
   154  		args := pipeline.PipeWriteArgs{
   155  			Data: chunkData,
   156  			Span: span,
   157  		}
   158  		err = writer.ChainWrite(&args)
   159  		if err != nil {
   160  			return err
   161  		}
   162  
   163  		err = callback(chunkLevel+1, span, args.Ref)
   164  		if err != nil {
   165  			return err
   166  		}
   167  	}
   168  	p.cursor[chunkLevel] = 0
   169  
   170  	return nil
   171  }
   172  
   173  // ElevateCarrierChunk moves the last poor orphan chunk to the level above where it can fit and there are other chunks as well.
   174  func (p *Params) ElevateCarrierChunk(chunkLevel int, callback ParityChunkCallback) error {
   175  	if p.level == NONE {
   176  		return nil
   177  	}
   178  	if p.cursor[chunkLevel] != 1 {
   179  		return fmt.Errorf("redundancy: cannot elevate carrier chunk because it is not the only chunk on the level. It has %d chunks", p.cursor[chunkLevel])
   180  	}
   181  
   182  	// not necessary to update current level since we will not work with it anymore
   183  	return p.chunkWrite(chunkLevel+1, p.buffer[chunkLevel][p.cursor[chunkLevel]-1], callback)
   184  }
   185  
   186  // GetRootData returns the topmost chunk in the tree.
   187  // throws and error if the encoding has not been finished in the BMT
   188  // OR redundancy is not used in the BMT
   189  func (p *Params) GetRootData() ([]byte, error) {
   190  	if p.level == NONE {
   191  		return nil, fmt.Errorf("redundancy: no redundancy level is used for the file in order to cache root data")
   192  	}
   193  	lastBuffer := p.buffer[len(p.buffer)-1]
   194  	if len(lastBuffer[0]) != swarm.ChunkWithSpanSize {
   195  		return nil, fmt.Errorf("redundancy: hashtrie sum has not finished in order to cache root data")
   196  	}
   197  	return lastBuffer[0], nil
   198  }