github.com/ethersphere/bee/v2@v2.2.0/pkg/file/splitter/internal/job.go

github.com/ethersphere/bee/v2@v2.2.0/pkg/file/splitter/internal/job.go (about)

     1  // Copyright 2020 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package internal
     6  
     7  import (
     8  	"context"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  
    13  	"github.com/ethersphere/bee/v2/pkg/cac"
    14  	"github.com/ethersphere/bee/v2/pkg/encryption"
    15  	"github.com/ethersphere/bee/v2/pkg/file"
    16  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    17  	"github.com/ethersphere/bee/v2/pkg/swarm"
    18  )
    19  
    20  // maximum amount of file tree levels this file hasher component can handle
    21  // (128 ^ (9 - 1)) * 4096 = 295147905179352825856 bytes
    22  const levelBufferLimit = 9
    23  
    24  // SimpleSplitterJob encapsulated a single splitter operation, accepting blockwise
    25  // writes of data whose length is defined in advance.
    26  //
    27  // After the job is constructed, Write must be called with up to ChunkSize byte slices
    28  // until the full data length has been written. The Sum should be called which will
    29  // return the SwarmHash of the data.
    30  //
    31  // Called Sum before the last Write, or Write after Sum has been called, may result in
    32  // error and will may result in undefined result.
    33  type SimpleSplitterJob struct {
    34  	ctx        context.Context
    35  	putter     storage.Putter
    36  	spanLength int64  // target length of data
    37  	length     int64  // number of bytes written to the data level of the hasher
    38  	sumCounts  []int  // number of sums performed, indexed per level
    39  	cursors    []int  // section write position, indexed per level
    40  	buffer     []byte // keeps data and hashes, indexed by cursors
    41  	toEncrypt  bool   // to encryrpt the chunks or not
    42  	refSize    int64
    43  }
    44  
    45  // NewSimpleSplitterJob creates a new SimpleSplitterJob.
    46  //
    47  // The spanLength is the length of the data that will be written.
    48  func NewSimpleSplitterJob(ctx context.Context, putter storage.Putter, spanLength int64, toEncrypt bool) *SimpleSplitterJob {
    49  	hashSize := swarm.HashSize
    50  	refSize := int64(hashSize)
    51  	if toEncrypt {
    52  		refSize += encryption.KeyLength
    53  	}
    54  
    55  	return &SimpleSplitterJob{
    56  		ctx:        ctx,
    57  		putter:     putter,
    58  		spanLength: spanLength,
    59  		sumCounts:  make([]int, levelBufferLimit),
    60  		cursors:    make([]int, levelBufferLimit),
    61  		buffer:     make([]byte, swarm.ChunkWithSpanSize*levelBufferLimit*2), // double size as temp workaround for weak calculation of needed buffer space
    62  		toEncrypt:  toEncrypt,
    63  		refSize:    refSize,
    64  	}
    65  }
    66  
    67  // Write adds data to the file splitter.
    68  func (j *SimpleSplitterJob) Write(b []byte) (int, error) {
    69  	if len(b) > swarm.ChunkSize {
    70  		return 0, fmt.Errorf("Write must be called with a maximum of %d bytes", swarm.ChunkSize)
    71  	}
    72  	j.length += int64(len(b))
    73  	if j.length > j.spanLength {
    74  		return 0, errors.New("write past span length")
    75  	}
    76  
    77  	err := j.writeToLevel(0, b)
    78  	if err != nil {
    79  		return 0, err
    80  	}
    81  	if j.length == j.spanLength {
    82  		err := j.hashUnfinished()
    83  		if err != nil {
    84  			return 0, file.NewHashError(err)
    85  		}
    86  		err = j.moveDanglingChunk()
    87  		if err != nil {
    88  			return 0, file.NewHashError(err)
    89  		}
    90  
    91  	}
    92  	return len(b), nil
    93  }
    94  
    95  // Sum returns the Swarm hash of the data.
    96  func (j *SimpleSplitterJob) Sum(b []byte) []byte {
    97  	return j.digest()
    98  }
    99  
   100  // writeToLevel writes to the data buffer on the specified level.
   101  // It calls sum if chunk boundary is reached and recursively calls this function for
   102  // the next level with the acquired bmt hash
   103  //
   104  // It adjusts the relevant levels' cursors accordingly.
   105  func (s *SimpleSplitterJob) writeToLevel(lvl int, data []byte) error {
   106  	copy(s.buffer[s.cursors[lvl]:s.cursors[lvl]+len(data)], data)
   107  	s.cursors[lvl] += len(data)
   108  	if s.cursors[lvl]-s.cursors[lvl+1] == swarm.ChunkSize {
   109  		ref, err := s.sumLevel(lvl)
   110  		if err != nil {
   111  			return err
   112  		}
   113  		err = s.writeToLevel(lvl+1, ref)
   114  		if err != nil {
   115  			return err
   116  		}
   117  		s.cursors[lvl] = s.cursors[lvl+1]
   118  	}
   119  	return nil
   120  }
   121  
   122  // sumLevel calculates and returns the bmt sum of the last written data on the level.
   123  //
   124  // TODO: error handling on store write fail
   125  func (s *SimpleSplitterJob) sumLevel(lvl int) ([]byte, error) {
   126  	s.sumCounts[lvl]++
   127  	spanSize := file.Spans[lvl] * swarm.ChunkSize
   128  	span := (s.length-1)%spanSize + 1
   129  
   130  	var chunkData []byte
   131  
   132  	head := make([]byte, swarm.SpanSize)
   133  	binary.LittleEndian.PutUint64(head, uint64(span))
   134  	tail := s.buffer[s.cursors[lvl+1]:s.cursors[lvl]]
   135  	chunkData = append(head, tail...)
   136  	c := chunkData
   137  	var encryptionKey encryption.Key
   138  
   139  	if s.toEncrypt {
   140  		var err error
   141  		c, encryptionKey, err = s.encryptChunkData(chunkData)
   142  		if err != nil {
   143  			return nil, err
   144  		}
   145  	}
   146  
   147  	ch, err := cac.NewWithDataSpan(c)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	err = s.putter.Put(s.ctx, ch)
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	return append(ch.Address().Bytes(), encryptionKey...), nil
   158  }
   159  
   160  // digest returns the calculated digest after a Sum call.
   161  //
   162  // The hash returned is the hash in the first section index of the work buffer
   163  // this will be the root hash when all recursive sums have completed.
   164  //
   165  // The method does not check that the final hash actually has been written, so
   166  // timing is the responsibility of the caller.
   167  func (s *SimpleSplitterJob) digest() []byte {
   168  	if s.toEncrypt {
   169  		return s.buffer[:swarm.SectionSize*2]
   170  	} else {
   171  		return s.buffer[:swarm.SectionSize]
   172  	}
   173  }
   174  
   175  // hashUnfinished hasher the remaining unhashed chunks at the end of each level if
   176  // write doesn't end on a chunk boundary.
   177  func (s *SimpleSplitterJob) hashUnfinished() error {
   178  	if s.length%swarm.ChunkSize != 0 {
   179  		ref, err := s.sumLevel(0)
   180  		if err != nil {
   181  			return err
   182  		}
   183  		copy(s.buffer[s.cursors[1]:], ref)
   184  		s.cursors[1] += len(ref)
   185  		s.cursors[0] = s.cursors[1]
   186  	}
   187  	return nil
   188  }
   189  
   190  // nolint:gofmt
   191  // moveDanglingChunk concatenates the reference to the single reference
   192  // at the highest level of the tree in case of a balanced tree.
   193  //
   194  // Let F be full chunks (disregarding branching factor) and S be single references
   195  // in the following scenario:
   196  //
   197  //	    S
   198  //	  F   F
   199  //	F   F   F
   200  //
   201  // F   F   F   F S
   202  //
   203  // The result will be:
   204  //
   205  //	    SS
   206  //	  F    F
   207  //	F   F   F
   208  //
   209  // F   F   F   F
   210  //
   211  // After which the SS will be hashed to obtain the final root hash
   212  func (s *SimpleSplitterJob) moveDanglingChunk() error {
   213  	// calculate the total number of levels needed to represent the data (including the data level)
   214  	targetLevel := file.Levels(s.length, swarm.SectionSize, swarm.Branches)
   215  
   216  	// sum every intermediate level and write to the level above it
   217  	for i := 1; i < targetLevel; i++ {
   218  
   219  		// and if there is a single reference outside a balanced tree on this level
   220  		// don't hash it again but pass it on to the next level
   221  		if s.sumCounts[i] > 0 {
   222  			// TODO: simplify if possible
   223  			if int64(s.sumCounts[i-1])-file.Spans[targetLevel-1-i] <= 1 {
   224  				s.cursors[i+1] = s.cursors[i]
   225  				s.cursors[i] = s.cursors[i-1]
   226  				continue
   227  			}
   228  		}
   229  
   230  		ref, err := s.sumLevel(i)
   231  		if err != nil {
   232  			return err
   233  		}
   234  		copy(s.buffer[s.cursors[i+1]:], ref)
   235  		s.cursors[i+1] += len(ref)
   236  		s.cursors[i] = s.cursors[i+1]
   237  	}
   238  	return nil
   239  }
   240  
   241  func (s *SimpleSplitterJob) encryptChunkData(chunkData []byte) ([]byte, encryption.Key, error) {
   242  	if len(chunkData) < 8 {
   243  		return nil, nil, fmt.Errorf("invalid data, min length 8 got %v", len(chunkData))
   244  	}
   245  
   246  	key, encryptedSpan, encryptedData, err := s.encrypt(chunkData)
   247  	if err != nil {
   248  		return nil, nil, err
   249  	}
   250  	c := make([]byte, len(encryptedSpan)+len(encryptedData))
   251  	copy(c[:8], encryptedSpan)
   252  	copy(c[8:], encryptedData)
   253  	return c, key, nil
   254  }
   255  
   256  func (s *SimpleSplitterJob) encrypt(chunkData []byte) (encryption.Key, []byte, []byte, error) {
   257  	key := encryption.GenerateRandomKey(encryption.KeyLength)
   258  	encryptedSpan, err := encryption.NewSpanEncryption(key).Encrypt(chunkData[:8])
   259  	if err != nil {
   260  		return nil, nil, nil, err
   261  	}
   262  	encryptedData, err := encryption.NewDataEncryption(key).Encrypt(chunkData[8:])
   263  	if err != nil {
   264  		return nil, nil, nil, err
   265  	}
   266  	return key, encryptedSpan, encryptedData, nil
   267  }