github.com/ethersphere/bee/v2@v2.2.0/pkg/file/pipeline/hashtrie/hashtrie.go (about)

     1  // Copyright 2020 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package hashtrie
     6  
     7  import (
     8  	"context"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  
    13  	"github.com/ethersphere/bee/v2/pkg/file/pipeline"
    14  	"github.com/ethersphere/bee/v2/pkg/file/redundancy"
    15  	"github.com/ethersphere/bee/v2/pkg/replicas"
    16  	"github.com/ethersphere/bee/v2/pkg/storage"
    17  	"github.com/ethersphere/bee/v2/pkg/swarm"
    18  )
    19  
    20  var (
    21  	errInconsistentRefs = errors.New("inconsistent references")
    22  	errTrieFull         = errors.New("trie full")
    23  )
    24  
    25  const maxLevel = 8
    26  
    27  type hashTrieWriter struct {
    28  	ctx                    context.Context // context for put function of dispersed replica chunks
    29  	refSize                int
    30  	cursors                []int  // level cursors, key is level. level 0 is data level holds how many chunks were processed. Intermediate higher levels will always have LOWER cursor values.
    31  	buffer                 []byte // keeps intermediate level data
    32  	full                   bool   // indicates whether the trie is full. currently we support (128^7)*4096 = 2305843009213693952 bytes
    33  	pipelineFn             pipeline.PipelineFunc
    34  	rParams                redundancy.RedundancyParams
    35  	parityChunkFn          redundancy.ParityChunkCallback
    36  	chunkCounters          []uint8        // counts the chunk references in intermediate chunks. key is the chunk level.
    37  	effectiveChunkCounters []uint8        // counts the effective  chunk references in intermediate chunks. key is the chunk level.
    38  	maxChildrenChunks      uint8          // maximum number of chunk references in intermediate chunks.
    39  	replicaPutter          storage.Putter // putter to save dispersed replicas of the root chunk
    40  }
    41  
    42  func NewHashTrieWriter(
    43  	ctx context.Context,
    44  	refLen int,
    45  	rParams redundancy.RedundancyParams,
    46  	pipelineFn pipeline.PipelineFunc,
    47  	replicaPutter storage.Putter,
    48  ) pipeline.ChainWriter {
    49  	h := &hashTrieWriter{
    50  		ctx:                    ctx,
    51  		refSize:                refLen,
    52  		cursors:                make([]int, 9),
    53  		buffer:                 make([]byte, swarm.ChunkWithSpanSize*9*2), // double size as temp workaround for weak calculation of needed buffer space
    54  		rParams:                rParams,
    55  		pipelineFn:             pipelineFn,
    56  		chunkCounters:          make([]uint8, 9),
    57  		effectiveChunkCounters: make([]uint8, 9),
    58  		maxChildrenChunks:      uint8(rParams.MaxShards() + rParams.Parities(rParams.MaxShards())),
    59  		replicaPutter:          replicas.NewPutter(replicaPutter),
    60  	}
    61  	h.parityChunkFn = func(level int, span, address []byte) error {
    62  		return h.writeToIntermediateLevel(level, true, span, address, []byte{})
    63  	}
    64  
    65  	return h
    66  }
    67  
    68  // accepts writes of hashes from the previous writer in the chain, by definition these writes
    69  // are on level 1
    70  func (h *hashTrieWriter) ChainWrite(p *pipeline.PipeWriteArgs) error {
    71  	oneRef := h.refSize + swarm.SpanSize
    72  	l := len(p.Span) + len(p.Ref) + len(p.Key)
    73  	if l%oneRef != 0 || l == 0 {
    74  		return errInconsistentRefs
    75  	}
    76  	if h.full {
    77  		return errTrieFull
    78  	}
    79  	if h.rParams.Level() == redundancy.NONE {
    80  		return h.writeToIntermediateLevel(1, false, p.Span, p.Ref, p.Key)
    81  	} else {
    82  		return h.writeToDataLevel(p.Span, p.Ref, p.Key, p.Data)
    83  	}
    84  }
    85  
    86  func (h *hashTrieWriter) writeToIntermediateLevel(level int, parityChunk bool, span, ref, key []byte) error {
    87  	copy(h.buffer[h.cursors[level]:h.cursors[level]+len(span)], span)
    88  	h.cursors[level] += len(span)
    89  	copy(h.buffer[h.cursors[level]:h.cursors[level]+len(ref)], ref)
    90  	h.cursors[level] += len(ref)
    91  	copy(h.buffer[h.cursors[level]:h.cursors[level]+len(key)], key)
    92  	h.cursors[level] += len(key)
    93  
    94  	// update counters
    95  	if !parityChunk {
    96  		h.effectiveChunkCounters[level]++
    97  	}
    98  	h.chunkCounters[level]++
    99  	if h.chunkCounters[level] == h.maxChildrenChunks {
   100  		// at this point the erasure coded chunks have been written
   101  		err := h.wrapFullLevel(level)
   102  		return err
   103  	}
   104  	return nil
   105  }
   106  
   107  // writeToDataLevel caches data chunks and call writeToIntermediateLevel
   108  func (h *hashTrieWriter) writeToDataLevel(span, ref, key, data []byte) error {
   109  	// write dataChunks to the level above
   110  	err := h.writeToIntermediateLevel(1, false, span, ref, key)
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	return h.rParams.ChunkWrite(0, data, h.parityChunkFn)
   116  }
   117  
   118  // wrapLevel wraps an existing level and writes the resulting hash to the following level
   119  // then truncates the current level data by shifting the cursors.
   120  // Steps are performed in the following order:
   121  //   - take all of the data in the current level
   122  //   - break down span and hash data
   123  //   - sum the span size, concatenate the hash to the buffer
   124  //   - call the short pipeline with the span and the buffer
   125  //   - get the hash that was created, append it one level above, and if necessary, wrap that level too
   126  //   - remove already hashed data from buffer
   127  //
   128  // assumes that h.chunkCounters[level] has reached h.maxChildrenChunks at fullchunk
   129  // or redundancy.Encode was called in case of rightmost chunks
   130  func (h *hashTrieWriter) wrapFullLevel(level int) error {
   131  	data := h.buffer[h.cursors[level+1]:h.cursors[level]]
   132  	sp := uint64(0)
   133  	var hashes []byte
   134  	offset := 0
   135  	for i := uint8(0); i < h.effectiveChunkCounters[level]; i++ {
   136  		// sum up the spans of the level, then we need to bmt them and store it as a chunk
   137  		// then write the chunk address to the next level up
   138  		sp += binary.LittleEndian.Uint64(data[offset : offset+swarm.SpanSize])
   139  		offset += +swarm.SpanSize
   140  		hash := data[offset : offset+h.refSize]
   141  		offset += h.refSize
   142  		hashes = append(hashes, hash...)
   143  	}
   144  	parities := 0
   145  	for offset < len(data) {
   146  		// we do not add span of parity chunks to the common because that is gibberish
   147  		offset += +swarm.SpanSize
   148  		hash := data[offset : offset+swarm.HashSize] // parity reference has always hash length
   149  		offset += swarm.HashSize
   150  		hashes = append(hashes, hash...)
   151  		parities++
   152  	}
   153  	spb := make([]byte, 8)
   154  	binary.LittleEndian.PutUint64(spb, sp)
   155  	if parities > 0 {
   156  		redundancy.EncodeLevel(spb, h.rParams.Level())
   157  	}
   158  	hashes = append(spb, hashes...)
   159  	writer := h.pipelineFn()
   160  	args := pipeline.PipeWriteArgs{
   161  		Data: hashes,
   162  		Span: spb,
   163  	}
   164  	err := writer.ChainWrite(&args)
   165  	if err != nil {
   166  		return err
   167  	}
   168  
   169  	err = h.writeToIntermediateLevel(level+1, false, args.Span, args.Ref, args.Key)
   170  	if err != nil {
   171  		return err
   172  	}
   173  
   174  	err = h.rParams.ChunkWrite(level, args.Data, h.parityChunkFn)
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	// this "truncates" the current level that was wrapped
   180  	// by setting the cursors to the cursors of one level above
   181  	h.cursors[level] = h.cursors[level+1]
   182  	h.chunkCounters[level], h.effectiveChunkCounters[level] = 0, 0
   183  	if level+1 == 8 {
   184  		h.full = true
   185  	}
   186  	return nil
   187  }
   188  
   189  // Sum returns the Swarm merkle-root content-addressed hash
   190  // of an arbitrary-length binary data.
   191  // The algorithm it uses is as follows:
   192  //   - From level 1 till maxLevel 8, iterate:
   193  //     -- If level data length equals 0 then continue to next level
   194  //     -- If level data length equals 1 reference then carry over level data to next
   195  //     -- If level data length is bigger than 1 reference then sum the level and
   196  //     write the result to the next level
   197  //   - Return the hash in level 8
   198  //
   199  // the cases are as follows:
   200  //   - one hash in a given level, in which case we _do not_ perform a hashing operation, but just move
   201  //     the hash to the next level, potentially resulting in a level wrap
   202  //   - more than one hash, in which case we _do_ perform a hashing operation, appending the hash to
   203  //     the next level
   204  func (h *hashTrieWriter) Sum() ([]byte, error) {
   205  	for i := 1; i < maxLevel; i++ {
   206  		l := h.chunkCounters[i]
   207  		switch {
   208  		case l == 0:
   209  			// level empty, continue to the next.
   210  			continue
   211  		case l == h.maxChildrenChunks:
   212  			// this case is possible and necessary due to the carry over
   213  			// in the next switch case statement. normal writes done
   214  			// through writeToLevel will automatically wrap a full level.
   215  			// erasure encoding call is not necessary since ElevateCarrierChunk solves that
   216  			err := h.wrapFullLevel(i)
   217  			if err != nil {
   218  				return nil, err
   219  			}
   220  		case l == 1:
   221  			// this cursor assignment basically means:
   222  			// take the hash|span|key from this level, and append it to
   223  			// the data of the next level. you may wonder how this works:
   224  			// every time we sum a level, the sum gets written into the next level
   225  			// and the level cursor gets set to the next level's cursor (see the
   226  			// truncating at the end of wrapFullLevel). there might (or not) be
   227  			// a hash at the next level, and the cursor of the next level is
   228  			// necessarily _smaller_ than the cursor of this level, so in fact what
   229  			// happens is that due to the shifting of the cursors, the data of this
   230  			// level will appear to be concatenated with the data of the next level.
   231  			// we therefore get a "carry-over" behavior between intermediate levels
   232  			// that might or might not have data. the eventual result is that the last
   233  			// hash generated will always be carried over to the last level (8), then returned.
   234  			h.cursors[i+1] = h.cursors[i]
   235  			// replace cached chunk to the level as well
   236  			err := h.rParams.ElevateCarrierChunk(i-1, h.parityChunkFn)
   237  			if err != nil {
   238  				return nil, err
   239  			}
   240  			// update counters, subtracting from current level is not necessary
   241  			h.effectiveChunkCounters[i+1]++
   242  			h.chunkCounters[i+1]++
   243  		default:
   244  			// call erasure encoding before writing the last chunk on the level
   245  			err := h.rParams.Encode(i-1, h.parityChunkFn)
   246  			if err != nil {
   247  				return nil, err
   248  			}
   249  			// more than 0 but smaller than chunk size - wrap the level to the one above it
   250  			err = h.wrapFullLevel(i)
   251  			if err != nil {
   252  				return nil, err
   253  			}
   254  		}
   255  	}
   256  	levelLen := h.chunkCounters[maxLevel]
   257  	if levelLen != 1 {
   258  		return nil, errInconsistentRefs
   259  	}
   260  
   261  	// return the hash in the highest level, that's all we need
   262  	data := h.buffer[0:h.cursors[maxLevel]]
   263  	rootHash := data[swarm.SpanSize:]
   264  
   265  	// save disperse replicas of the root chunk
   266  	if h.rParams.Level() != redundancy.NONE {
   267  		rootData, err := h.rParams.GetRootData()
   268  		if err != nil {
   269  			return nil, err
   270  		}
   271  		err = h.replicaPutter.Put(h.ctx, swarm.NewChunk(swarm.NewAddress(rootHash[:swarm.HashSize]), rootData))
   272  		if err != nil {
   273  			return nil, fmt.Errorf("hashtrie: cannot put dispersed replica %s", err.Error())
   274  		}
   275  	}
   276  	return rootHash, nil
   277  }