github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/blobs/stream.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package blobs
    12  
    13  import (
    14  	"io"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/blobs/blobspb"
    17  )
    18  
    19  // Within the blob service, streaming is used in two functions:
    20  //   - GetStream, streaming from server to client
    21  //   - PutStream, streaming from client to server
    22  // These functions are used to read or write files on a remote node.
    23  // The io.ReadCloser we implement here are used on the _receiver's_
    24  // side, to read from either Blob_GetStreamClient or Blob_PutStreamServer.
    25  // The function streamContent() is used on the _sender's_ side to split
    26  // the content and send it using Blob_GetStreamServer or Blob_PutStreamClient.
    27  
    28  // chunkSize was decided to be 128K after running an experiment benchmarking
    29  // ReadFile and WriteFile. It seems like the benefits of streaming do not appear
    30  // until files of 1 MB or larger, and for those files, 128K chunks are optimal.
    31  // For ReadFile, larger chunks are more efficient but the gains are not as significant
    32  // past 128K. For WriteFile, 128K chunks perform best, and past that, performance
    33  // starts decreasing.
    34  var chunkSize = 128 * 1 << 10
    35  
    36  // blobStreamReader implements a ReadCloser which receives
    37  // gRPC streaming messages.
    38  var _ io.ReadCloser = &blobStreamReader{}
    39  
    40  type streamReceiver interface {
    41  	SendAndClose(*blobspb.StreamResponse) error
    42  	Recv() (*blobspb.StreamChunk, error)
    43  }
    44  
    45  // nopSendAndClose creates a GetStreamClient that has a nop SendAndClose function.
    46  // This is needed as Blob_GetStreamClient does not have a Close() function, whereas
    47  // the other sender, Blob_PutStreamServer, does.
    48  type nopSendAndClose struct {
    49  	blobspb.Blob_GetStreamClient
    50  }
    51  
    52  func (*nopSendAndClose) SendAndClose(*blobspb.StreamResponse) error {
    53  	return nil
    54  }
    55  
    56  // newGetStreamReader creates an io.ReadCloser that uses gRPC's streaming API
    57  // to read chunks of data.
    58  func newGetStreamReader(client blobspb.Blob_GetStreamClient) io.ReadCloser {
    59  	return &blobStreamReader{
    60  		stream: &nopSendAndClose{client},
    61  	}
    62  }
    63  
    64  // newPutStreamReader creates an io.ReadCloser that uses gRPC's streaming API
    65  // to read chunks of data.
    66  func newPutStreamReader(client blobspb.Blob_PutStreamServer) io.ReadCloser {
    67  	return &blobStreamReader{stream: client}
    68  }
    69  
    70  type blobStreamReader struct {
    71  	lastPayload []byte
    72  	lastOffset  int
    73  	stream      streamReceiver
    74  	EOFReached  bool
    75  }
    76  
    77  func (r *blobStreamReader) Read(out []byte) (int, error) {
    78  	if r.EOFReached {
    79  		return 0, io.EOF
    80  	}
    81  
    82  	offset := 0
    83  	// Use the last payload.
    84  	if r.lastPayload != nil {
    85  		offset = len(r.lastPayload) - r.lastOffset
    86  		if len(out) < offset {
    87  			copy(out, r.lastPayload[r.lastOffset:])
    88  			r.lastOffset += len(out)
    89  			return len(out), nil
    90  		}
    91  		copy(out[:offset], r.lastPayload[r.lastOffset:])
    92  		r.lastPayload = nil
    93  	}
    94  	for offset < len(out) {
    95  		chunk, err := r.stream.Recv()
    96  		if err == io.EOF {
    97  			r.EOFReached = true
    98  			break
    99  		}
   100  		if err != nil {
   101  			return offset, err
   102  		}
   103  		var lenToWrite int
   104  		if len(out)-offset >= len(chunk.Payload) {
   105  			lenToWrite = len(chunk.Payload)
   106  		} else {
   107  			lenToWrite = len(out) - offset
   108  			// Need to cache payload.
   109  			r.lastPayload = chunk.Payload
   110  			r.lastOffset = lenToWrite
   111  		}
   112  		copy(out[offset:offset+lenToWrite], chunk.Payload[:lenToWrite])
   113  		offset += lenToWrite
   114  	}
   115  	return offset, nil
   116  }
   117  
   118  func (r *blobStreamReader) Close() error {
   119  	return r.stream.SendAndClose(&blobspb.StreamResponse{})
   120  }
   121  
   122  type streamSender interface {
   123  	Send(*blobspb.StreamChunk) error
   124  }
   125  
   126  // streamContent splits the content into chunks, of size `chunkSize`,
   127  // and streams those chunks to sender.
   128  // Note: This does not close the stream.
   129  func streamContent(sender streamSender, content io.Reader) error {
   130  	payload := make([]byte, chunkSize)
   131  	var chunk blobspb.StreamChunk
   132  	for {
   133  		n, err := content.Read(payload)
   134  		if n > 0 {
   135  			chunk.Payload = payload[:n]
   136  			err = sender.Send(&chunk)
   137  		}
   138  		if err == io.EOF {
   139  			return nil
   140  		}
   141  		if err != nil {
   142  			return err
   143  		}
   144  	}
   145  }