github.com/0chain/gosdk@v1.17.11/zboxcore/sdk/chunked_upload_chunk_reader.go (about)

     1  package sdk
     2  
     3  import (
     4  	"io"
     5  	"math"
     6  	"strconv"
     7  	"sync"
     8  
     9  	"github.com/0chain/errors"
    10  	"github.com/0chain/gosdk/constants"
    11  	"github.com/0chain/gosdk/zboxcore/encryption"
    12  	"github.com/0chain/gosdk/zboxcore/logger"
    13  	"github.com/0chain/gosdk/zboxcore/zboxutil"
    14  	"github.com/klauspost/reedsolomon"
    15  	"github.com/valyala/bytebufferpool"
    16  )
    17  
    18  var (
    19  	uploadPool   bytebufferpool.Pool
    20  	formDataPool bytebufferpool.Pool
    21  )
    22  
    23  type ChunkedUploadChunkReader interface {
    24  	// Next read, encode and encrypt next chunk
    25  	Next() (*ChunkData, error)
    26  
    27  	// Read read, encode and encrypt all bytes
    28  	Read(buf []byte) ([][]byte, error)
    29  
    30  	//Close Hash Channel
    31  	Close()
    32  	//GetFileHash get file hash
    33  	GetFileHash() (string, error)
    34  	//Reset reset offset
    35  	Reset()
    36  }
    37  
    38  // chunkedUploadChunkReader read chunk bytes from io.Reader. see detail on https://github.com/0chain/blobber/wiki/Protocols#what-is-fixedmerkletree
    39  type chunkedUploadChunkReader struct {
    40  	fileReader io.Reader
    41  
    42  	//size total size of source. 0 means we don't it
    43  	size int64
    44  	// readSize total read size from source
    45  	readSize int64
    46  
    47  	// chunkSize chunk size with encryption header
    48  	chunkSize int64
    49  
    50  	// chunkHeaderSize encrypt header size
    51  	chunkHeaderSize int64
    52  	// chunkDataSize data size without encryption header in a chunk. It is same as ChunkSize if EncryptOnUpload is false
    53  	chunkDataSize int64
    54  
    55  	// chunkDataSizePerRead total size should be read from original io.Reader. It is DataSize * DataShards.
    56  	chunkDataSizePerRead int64
    57  
    58  	//totaChunkDataSizePerRead total size of data in a chunk. It is DataSize * (DataShards + ParityShards)
    59  	totalChunkDataSizePerRead int64
    60  
    61  	//fileShardsDataBuffer
    62  	fileShardsDataBuffer *bytebufferpool.ByteBuffer
    63  
    64  	//offset
    65  	offset      int64
    66  	chunkNumber int64
    67  
    68  	// nextChunkIndex next index for reading
    69  	nextChunkIndex int
    70  
    71  	dataShards int
    72  
    73  	// encryptOnUpload enccrypt data on upload
    74  	encryptOnUpload bool
    75  
    76  	uploadMask zboxutil.Uint128
    77  	// erasureEncoder erasuer encoder
    78  	erasureEncoder reedsolomon.Encoder
    79  	// encscheme encryption scheme
    80  	encscheme encryption.EncryptionScheme
    81  	// hasher to calculate actual file hash, validation root and fixed merkle root
    82  	hasher         Hasher
    83  	hasherDataChan chan []byte
    84  	hasherError    error
    85  	hasherWG       sync.WaitGroup
    86  	closeOnce      sync.Once
    87  }
    88  
    89  // createChunkReader create ChunkReader instance
    90  func createChunkReader(fileReader io.Reader, size, chunkSize int64, dataShards, parityShards int, encryptOnUpload bool, uploadMask zboxutil.Uint128, erasureEncoder reedsolomon.Encoder, encscheme encryption.EncryptionScheme, hasher Hasher, chunkNumber int) (ChunkedUploadChunkReader, error) {
    91  
    92  	if chunkSize <= 0 {
    93  		return nil, errors.Throw(constants.ErrInvalidParameter, "chunkSize: "+strconv.FormatInt(chunkSize, 10))
    94  	}
    95  
    96  	if dataShards <= 0 {
    97  		return nil, errors.Throw(constants.ErrInvalidParameter, "dataShards: "+strconv.Itoa(dataShards))
    98  	}
    99  
   100  	if erasureEncoder == nil {
   101  		return nil, errors.Throw(constants.ErrInvalidParameter, "erasureEncoder")
   102  	}
   103  
   104  	if hasher == nil {
   105  		return nil, errors.Throw(constants.ErrInvalidParameter, "hasher")
   106  	}
   107  
   108  	r := &chunkedUploadChunkReader{
   109  		fileReader:      fileReader,
   110  		size:            size,
   111  		chunkSize:       chunkSize,
   112  		nextChunkIndex:  0,
   113  		dataShards:      dataShards,
   114  		encryptOnUpload: encryptOnUpload,
   115  		uploadMask:      uploadMask,
   116  		erasureEncoder:  erasureEncoder,
   117  		encscheme:       encscheme,
   118  		hasher:          hasher,
   119  		hasherDataChan:  make(chan []byte, 3*chunkNumber),
   120  		hasherWG:        sync.WaitGroup{},
   121  		chunkNumber:     int64(chunkNumber),
   122  	}
   123  
   124  	if r.encryptOnUpload {
   125  		//additional 16 bytes to save encrypted data
   126  		r.chunkHeaderSize = EncryptedDataPaddingSize + EncryptionHeaderSize
   127  		r.chunkDataSize = chunkSize - r.chunkHeaderSize
   128  	} else {
   129  		r.chunkDataSize = chunkSize
   130  	}
   131  
   132  	r.chunkDataSizePerRead = r.chunkDataSize * int64(dataShards)
   133  	r.totalChunkDataSizePerRead = r.chunkDataSize * int64(dataShards+parityShards)
   134  	if CurrentMode == UploadModeHigh {
   135  		r.hasherWG.Add(1)
   136  		go r.hashData()
   137  	}
   138  	return r, nil
   139  }
   140  
   141  // ChunkData data of a chunk
   142  type ChunkData struct {
   143  	// Index current index of chunks
   144  	Index int
   145  	// IsFinal last chunk or not
   146  	IsFinal bool
   147  
   148  	// ReadSize total size read from original reader (un-encoded, un-encrypted)
   149  	ReadSize int64
   150  	// FragmentSize fragment size for a blobber (un-encrypted)
   151  	FragmentSize int64
   152  	// Fragments data shared for bloobers
   153  	Fragments [][]byte
   154  }
   155  
   156  // func (r *chunkReader) GetChunkDataSize() int64 {
   157  // 	if r == nil {
   158  // 		return 0
   159  // 	}
   160  // 	return r.chunkDataSize
   161  // }
   162  
   163  // Next read next chunks for blobbers
   164  func (r *chunkedUploadChunkReader) Next() (*ChunkData, error) {
   165  
   166  	if r == nil {
   167  		return nil, errors.Throw(constants.ErrInvalidParameter, "r")
   168  	}
   169  
   170  	if r.fileShardsDataBuffer == nil {
   171  		totalDataSize := r.totalChunkDataSizePerRead * r.chunkNumber
   172  		readSize := r.chunkDataSizePerRead * r.chunkNumber
   173  		if r.size > 0 && readSize > r.size {
   174  			chunkNum := (r.size + r.chunkDataSizePerRead - 1) / r.chunkDataSizePerRead
   175  			totalDataSize = r.totalChunkDataSizePerRead * chunkNum
   176  		}
   177  		buf := uploadPool.Get()
   178  		if cap(buf.B) < int(totalDataSize) {
   179  			logger.Logger.Debug("creating buffer with size: ", " totalDataSize: ", totalDataSize)
   180  			buf.B = make([]byte, 0, totalDataSize)
   181  		} else {
   182  			logger.Logger.Debug("reusing buffer with size: ", cap(buf.B), " totalDataSize: ", totalDataSize, " len: ", len(buf.B))
   183  		}
   184  		r.fileShardsDataBuffer = buf
   185  	}
   186  
   187  	chunk := &ChunkData{
   188  		Index:   r.nextChunkIndex,
   189  		IsFinal: false,
   190  
   191  		ReadSize:     0,
   192  		FragmentSize: 0,
   193  	}
   194  	chunkBytes := r.fileShardsDataBuffer.B[r.offset : r.offset+r.chunkDataSizePerRead : r.offset+r.totalChunkDataSizePerRead]
   195  	var (
   196  		readLen int
   197  		err     error
   198  	)
   199  	for readLen < len(chunkBytes) && err == nil {
   200  		var nn int
   201  		nn, err = r.fileReader.Read(chunkBytes[readLen:])
   202  		readLen += nn
   203  	}
   204  	if err != nil {
   205  
   206  		if !errors.Is(err, io.EOF) {
   207  			return nil, err
   208  		}
   209  
   210  		//all bytes are read
   211  		chunk.IsFinal = true
   212  	}
   213  
   214  	if readLen == 0 {
   215  		chunk.IsFinal = true
   216  		return chunk, nil
   217  	}
   218  
   219  	chunk.FragmentSize = int64(math.Ceil(float64(readLen)/float64(r.dataShards))) + r.chunkHeaderSize
   220  	if readLen < int(r.chunkDataSizePerRead) {
   221  		chunkBytes = chunkBytes[:readLen]
   222  		chunk.IsFinal = true
   223  	}
   224  
   225  	chunk.ReadSize = int64(readLen)
   226  	r.readSize += chunk.ReadSize
   227  	if r.size > 0 {
   228  		if r.readSize >= r.size {
   229  			chunk.IsFinal = true
   230  		}
   231  	}
   232  
   233  	if r.hasherError != nil {
   234  		return chunk, r.hasherError
   235  	}
   236  
   237  	if CurrentMode == UploadModeHigh {
   238  		r.hasherDataChan <- chunkBytes
   239  	} else {
   240  		_ = r.hasher.WriteToFile(chunkBytes)
   241  	}
   242  
   243  	fragments, err := r.erasureEncoder.Split(chunkBytes)
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  
   248  	err = r.erasureEncoder.Encode(fragments)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	var pos uint64
   253  	if r.encryptOnUpload {
   254  		for i := r.uploadMask; !i.Equals64(0); i = i.And(zboxutil.NewUint128(1).Lsh(pos).Not()) {
   255  			pos = uint64(i.TrailingZeros())
   256  			encMsg, err := r.encscheme.Encrypt(fragments[pos])
   257  			if err != nil {
   258  				return nil, err
   259  			}
   260  			fragments[pos] = make([]byte, len(encMsg.EncryptedData)+EncryptionHeaderSize)
   261  			n := copy(fragments[pos], encMsg.MessageChecksum+encMsg.OverallChecksum)
   262  			copy(fragments[pos][n:], encMsg.EncryptedData)
   263  		}
   264  	}
   265  
   266  	chunk.Fragments = fragments
   267  	r.nextChunkIndex++
   268  	r.offset += r.totalChunkDataSizePerRead
   269  	return chunk, nil
   270  }
   271  
   272  // Read read, encode and encrypt all bytes
   273  func (r *chunkedUploadChunkReader) Read(buf []byte) ([][]byte, error) {
   274  
   275  	if buf == nil {
   276  		return nil, nil
   277  	}
   278  
   279  	if r == nil {
   280  		return nil, errors.Throw(constants.ErrInvalidParameter, "r")
   281  	}
   282  
   283  	fragments, err := r.erasureEncoder.Split(buf)
   284  	if err != nil {
   285  		return nil, err
   286  	}
   287  
   288  	err = r.erasureEncoder.Encode(fragments)
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  
   293  	var pos uint64
   294  	if r.encryptOnUpload {
   295  		for i := r.uploadMask; !i.Equals64(0); i = i.And(zboxutil.NewUint128(1).Lsh(pos).Not()) {
   296  			pos = uint64(i.TrailingZeros())
   297  			encMsg, err := r.encscheme.Encrypt(fragments[pos])
   298  			if err != nil {
   299  				return nil, err
   300  			}
   301  			fragments[pos] = make([]byte, len(encMsg.EncryptedData)+EncryptionHeaderSize)
   302  			n := copy(fragments[pos], encMsg.MessageChecksum+encMsg.OverallChecksum)
   303  			copy(fragments[pos][n:], encMsg.EncryptedData)
   304  		}
   305  	}
   306  
   307  	return fragments, nil
   308  }
   309  
   310  func (r *chunkedUploadChunkReader) Reset() {
   311  	r.offset = 0
   312  }
   313  
   314  func (r *chunkedUploadChunkReader) Close() {
   315  	r.closeOnce.Do(func() {
   316  		close(r.hasherDataChan)
   317  		r.hasherWG.Wait()
   318  		uploadPool.Put(r.fileShardsDataBuffer)
   319  	})
   320  
   321  }
   322  
   323  func (r *chunkedUploadChunkReader) GetFileHash() (string, error) {
   324  	r.Close()
   325  	if r.hasherError != nil {
   326  		return "", r.hasherError
   327  	}
   328  	return r.hasher.GetFileHash()
   329  }
   330  
   331  func (r *chunkedUploadChunkReader) hashData() {
   332  	defer r.hasherWG.Done()
   333  	for data := range r.hasherDataChan {
   334  		err := r.hasher.WriteToFile(data)
   335  		if err != nil {
   336  			r.hasherError = err
   337  			return
   338  		}
   339  	}
   340  }