github.com/ethersphere/bee/v2@v2.2.0/pkg/file/pipeline/builder/builder.go (about)

     1  // Copyright 2020 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package builder
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  
    13  	"github.com/ethersphere/bee/v2/pkg/encryption"
    14  	"github.com/ethersphere/bee/v2/pkg/file/pipeline"
    15  	"github.com/ethersphere/bee/v2/pkg/file/pipeline/bmt"
    16  	enc "github.com/ethersphere/bee/v2/pkg/file/pipeline/encryption"
    17  	"github.com/ethersphere/bee/v2/pkg/file/pipeline/feeder"
    18  	"github.com/ethersphere/bee/v2/pkg/file/pipeline/hashtrie"
    19  	"github.com/ethersphere/bee/v2/pkg/file/pipeline/store"
    20  	"github.com/ethersphere/bee/v2/pkg/file/redundancy"
    21  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    22  	"github.com/ethersphere/bee/v2/pkg/swarm"
    23  )
    24  
    25  // NewPipelineBuilder returns the appropriate pipeline according to the specified parameters
    26  func NewPipelineBuilder(ctx context.Context, s storage.Putter, encrypt bool, rLevel redundancy.Level) pipeline.Interface {
    27  	if encrypt {
    28  		return newEncryptionPipeline(ctx, s, rLevel)
    29  	}
    30  	return newPipeline(ctx, s, rLevel)
    31  }
    32  
    33  // newPipeline creates a standard pipeline that only hashes content with BMT to create
    34  // a merkle-tree of hashes that represent the given arbitrary size byte stream. Partial
    35  // writes are supported. The pipeline flow is: Data -> Feeder -> BMT -> Storage -> HashTrie.
    36  func newPipeline(ctx context.Context, s storage.Putter, rLevel redundancy.Level) pipeline.Interface {
    37  	pipeline := newShortPipelineFunc(ctx, s)
    38  	tw := hashtrie.NewHashTrieWriter(
    39  		ctx,
    40  		swarm.HashSize,
    41  		redundancy.New(rLevel, false, pipeline),
    42  		pipeline,
    43  		s,
    44  	)
    45  	lsw := store.NewStoreWriter(ctx, s, tw)
    46  	b := bmt.NewBmtWriter(lsw)
    47  	return feeder.NewChunkFeederWriter(swarm.ChunkSize, b)
    48  }
    49  
    50  // newShortPipelineFunc returns a constructor function for an ephemeral hashing pipeline
    51  // needed by the hashTrieWriter.
    52  func newShortPipelineFunc(ctx context.Context, s storage.Putter) func() pipeline.ChainWriter {
    53  	return func() pipeline.ChainWriter {
    54  		lsw := store.NewStoreWriter(ctx, s, nil)
    55  		return bmt.NewBmtWriter(lsw)
    56  	}
    57  }
    58  
    59  // newEncryptionPipeline creates an encryption pipeline that encrypts using CTR, hashes content with BMT to create
    60  // a merkle-tree of hashes that represent the given arbitrary size byte stream. Partial
    61  // writes are supported. The pipeline flow is: Data -> Feeder -> Encryption -> BMT -> Storage -> HashTrie.
    62  // Note that the encryption writer will mutate the data to contain the encrypted span, but the span field
    63  // with the unencrypted span is preserved.
    64  func newEncryptionPipeline(ctx context.Context, s storage.Putter, rLevel redundancy.Level) pipeline.Interface {
    65  	tw := hashtrie.NewHashTrieWriter(
    66  		ctx,
    67  		swarm.HashSize+encryption.KeyLength,
    68  		redundancy.New(rLevel, true, newShortPipelineFunc(ctx, s)),
    69  		newShortEncryptionPipelineFunc(ctx, s),
    70  		s,
    71  	)
    72  	lsw := store.NewStoreWriter(ctx, s, tw)
    73  	b := bmt.NewBmtWriter(lsw)
    74  	enc := enc.NewEncryptionWriter(encryption.NewChunkEncrypter(), b)
    75  	return feeder.NewChunkFeederWriter(swarm.ChunkSize, enc)
    76  }
    77  
    78  // newShortEncryptionPipelineFunc returns a constructor function for an ephemeral hashing pipeline
    79  // needed by the hashTrieWriter.
    80  func newShortEncryptionPipelineFunc(ctx context.Context, s storage.Putter) func() pipeline.ChainWriter {
    81  	return func() pipeline.ChainWriter {
    82  		lsw := store.NewStoreWriter(ctx, s, nil)
    83  		b := bmt.NewBmtWriter(lsw)
    84  		return enc.NewEncryptionWriter(encryption.NewChunkEncrypter(), b)
    85  	}
    86  }
    87  
    88  // FeedPipeline feeds the pipeline with the given reader until EOF is reached.
    89  // It returns the cryptographic root hash of the content.
    90  func FeedPipeline(ctx context.Context, pipeline pipeline.Interface, r io.Reader) (addr swarm.Address, err error) {
    91  	data := make([]byte, swarm.ChunkSize)
    92  	for {
    93  		c, err := r.Read(data)
    94  		if err != nil {
    95  			if errors.Is(err, io.EOF) {
    96  				if c > 0 {
    97  					cc, err := pipeline.Write(data[:c])
    98  					if err != nil {
    99  						return swarm.ZeroAddress, err
   100  					}
   101  					if cc < c {
   102  						return swarm.ZeroAddress, fmt.Errorf("pipeline short write: %d mismatches %d", cc, c)
   103  					}
   104  				}
   105  				break
   106  			} else {
   107  				return swarm.ZeroAddress, err
   108  			}
   109  		}
   110  		cc, err := pipeline.Write(data[:c])
   111  		if err != nil {
   112  			return swarm.ZeroAddress, err
   113  		}
   114  		if cc < c {
   115  			return swarm.ZeroAddress, fmt.Errorf("pipeline short write: %d mismatches %d", cc, c)
   116  		}
   117  		select {
   118  		case <-ctx.Done():
   119  			return swarm.ZeroAddress, ctx.Err()
   120  		default:
   121  		}
   122  	}
   123  	select {
   124  	case <-ctx.Done():
   125  		return swarm.ZeroAddress, ctx.Err()
   126  	default:
   127  	}
   128  
   129  	sum, err := pipeline.Sum()
   130  	if err != nil {
   131  		return swarm.ZeroAddress, err
   132  	}
   133  
   134  	newAddress := swarm.NewAddress(sum)
   135  	return newAddress, nil
   136  }