storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/message.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package s3select
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"hash/crc32"
    24  	"net/http"
    25  	"strconv"
    26  	"sync/atomic"
    27  	"time"
    28  )
    29  
    30  // A message is in the format specified in
    31  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-frame-overview.png
    32  // hence the calculation is made accordingly.
    33  func totalByteLength(headerLength, payloadLength int) int {
    34  	return 4 + 4 + 4 + headerLength + payloadLength + 4
    35  }
    36  
    37  func genMessage(header, payload []byte) []byte {
    38  	headerLength := len(header)
    39  	payloadLength := len(payload)
    40  	totalLength := totalByteLength(headerLength, payloadLength)
    41  
    42  	buf := new(bytes.Buffer)
    43  	binary.Write(buf, binary.BigEndian, uint32(totalLength))
    44  	binary.Write(buf, binary.BigEndian, uint32(headerLength))
    45  	prelude := buf.Bytes()
    46  	binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude))
    47  	buf.Write(header)
    48  	if payload != nil {
    49  		buf.Write(payload)
    50  	}
    51  	message := buf.Bytes()
    52  	binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message))
    53  
    54  	return buf.Bytes()
    55  }
    56  
    57  // Refer genRecordsHeader().
    58  var recordsHeader = []byte{
    59  	13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
    60  	13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 24, 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'o', 'c', 't', 'e', 't', '-', 's', 't', 'r', 'e', 'a', 'm',
    61  	11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 7, 'R', 'e', 'c', 'o', 'r', 'd', 's',
    62  }
    63  
    64  const (
    65  	// Chosen for compatibility with AWS JAVA SDK
    66  	// It has a a buffer size of 128K:
    67  	// https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/internal/eventstreaming/MessageDecoder.java#L26
    68  	// but we must make sure there is always space to add 256 bytes:
    69  	// https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/model/SelectObjectContentEventStream.java#L197
    70  	maxRecordMessageLength = (128 << 10) - 256
    71  )
    72  
    73  var (
    74  	bufLength = payloadLenForMsgLen(maxRecordMessageLength)
    75  )
    76  
    77  // newRecordsMessage - creates new Records Message which can contain a single record, partial records,
    78  // or multiple records. Depending on the size of the result, a response can contain one or more of these messages.
    79  //
    80  // Header specification
    81  // Records messages contain three headers, as follows:
    82  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-record.png
    83  //
    84  // Payload specification
    85  // Records message payloads can contain a single record, partial records, or multiple records.
    86  func newRecordsMessage(payload []byte) []byte {
    87  	return genMessage(recordsHeader, payload)
    88  }
    89  
    90  // payloadLenForMsgLen computes the length of the payload in a record
    91  // message given the total length of the message.
    92  func payloadLenForMsgLen(messageLength int) int {
    93  	headerLength := len(recordsHeader)
    94  	payloadLength := messageLength - 4 - 4 - 4 - headerLength - 4
    95  	return payloadLength
    96  }
    97  
    98  // continuationMessage - S3 periodically sends this message to keep the TCP connection open.
    99  // These messages appear in responses at random. The client must detect the message type and process accordingly.
   100  //
   101  // Header specification:
   102  // Continuation messages contain two headers, as follows:
   103  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-cont.png
   104  //
   105  // Payload specification:
   106  // Continuation messages have no payload.
   107  var continuationMessage = []byte{
   108  	0, 0, 0, 57, // total byte-length.
   109  	0, 0, 0, 41, // headers byte-length.
   110  	139, 161, 157, 242, // prelude crc.
   111  	13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers.
   112  	11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 4, 'C', 'o', 'n', 't', // headers.
   113  	156, 134, 74, 13, // message crc.
   114  }
   115  
   116  // Refer genProgressHeader().
   117  var progressHeader = []byte{
   118  	13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
   119  	13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l',
   120  	11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 'P', 'r', 'o', 'g', 'r', 'e', 's', 's',
   121  }
   122  
   123  // newProgressMessage - creates new Progress Message. S3 periodically sends this message, if requested.
   124  // It contains information about the progress of a query that has started but has not yet completed.
   125  //
   126  // Header specification:
   127  // Progress messages contain three headers, as follows:
   128  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-progress.png
   129  //
   130  // Payload specification:
   131  // Progress message payload is an XML document containing information about the progress of a request.
   132  //   * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed).
   133  //   * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed).
   134  //   * BytesReturned => Current number of bytes of records payload data returned by S3.
   135  //
   136  // For uncompressed files, BytesScanned and BytesProcessed are equal.
   137  //
   138  // Example:
   139  //
   140  // <?xml version="1.0" encoding="UTF-8"?>
   141  // <Progress>
   142  //   <BytesScanned>512</BytesScanned>
   143  //   <BytesProcessed>1024</BytesProcessed>
   144  //   <BytesReturned>1024</BytesReturned>
   145  // </Progress>
   146  //
   147  func newProgressMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte {
   148  	payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Progress><BytesScanned>` +
   149  		strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` +
   150  		strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` +
   151  		strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`)
   152  	return genMessage(progressHeader, payload)
   153  }
   154  
   155  // Refer genStatsHeader().
   156  var statsHeader = []byte{
   157  	13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
   158  	13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l',
   159  	11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'S', 't', 'a', 't', 's',
   160  }
   161  
   162  // newStatsMessage - creates new Stats Message. S3 sends this message at the end of the request.
   163  // It contains statistics about the query.
   164  //
   165  // Header specification:
   166  // Stats messages contain three headers, as follows:
   167  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-stats.png
   168  //
   169  // Payload specification:
   170  // Stats message payload is an XML document containing information about a request's stats when processing is complete.
   171  //   * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed).
   172  //   * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed).
   173  //   * BytesReturned => Total number of bytes of records payload data returned by S3.
   174  //
   175  // For uncompressed files, BytesScanned and BytesProcessed are equal.
   176  //
   177  // Example:
   178  //
   179  // <?xml version="1.0" encoding="UTF-8"?>
   180  // <Stats>
   181  //      <BytesScanned>512</BytesScanned>
   182  //      <BytesProcessed>1024</BytesProcessed>
   183  //      <BytesReturned>1024</BytesReturned>
   184  // </Stats>
   185  func newStatsMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte {
   186  	payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Stats><BytesScanned>` +
   187  		strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` +
   188  		strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` +
   189  		strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`)
   190  	return genMessage(statsHeader, payload)
   191  }
   192  
   193  // endMessage - indicates that the request is complete, and no more messages will be sent.
   194  // You should not assume that the request is complete until the client receives an End message.
   195  //
   196  // Header specification:
   197  // End messages contain two headers, as follows:
   198  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-end.png
   199  //
   200  // Payload specification:
   201  // End messages have no payload.
   202  var endMessage = []byte{
   203  	0, 0, 0, 56, // total byte-length.
   204  	0, 0, 0, 40, // headers byte-length.
   205  	193, 198, 132, 212, // prelude crc.
   206  	13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers.
   207  	11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 3, 'E', 'n', 'd', // headers.
   208  	207, 151, 211, 146, // message crc.
   209  }
   210  
   211  // newErrorMessage - creates new Request Level Error Message. S3 sends this message if the request failed for any reason.
   212  // It contains the error code and error message for the failure. If S3 sends a RequestLevelError message,
   213  // it doesn't send an End message.
   214  //
   215  // Header specification:
   216  // Request-level error messages contain three headers, as follows:
   217  // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-error.png
   218  //
   219  // Payload specification:
   220  // Request-level error messages have no payload.
   221  func newErrorMessage(errorCode, errorMessage []byte) []byte {
   222  	buf := new(bytes.Buffer)
   223  
   224  	buf.Write([]byte{13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'r', 'r', 'o', 'r'})
   225  
   226  	buf.Write([]byte{14, ':', 'e', 'r', 'r', 'o', 'r', '-', 'm', 'e', 's', 's', 'a', 'g', 'e', 7})
   227  	binary.Write(buf, binary.BigEndian, uint16(len(errorMessage)))
   228  	buf.Write(errorMessage)
   229  
   230  	buf.Write([]byte{11, ':', 'e', 'r', 'r', 'o', 'r', '-', 'c', 'o', 'd', 'e', 7})
   231  	binary.Write(buf, binary.BigEndian, uint16(len(errorCode)))
   232  	buf.Write(errorCode)
   233  
   234  	return genMessage(buf.Bytes(), nil)
   235  }
   236  
   237  // NewErrorMessage - creates new Request Level Error Message specified in
   238  // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
   239  func NewErrorMessage(errorCode, errorMessage string) []byte {
   240  	return newErrorMessage([]byte(errorCode), []byte(errorMessage))
   241  }
   242  
   243  type messageWriter struct {
   244  	writer          http.ResponseWriter
   245  	getProgressFunc func() (int64, int64)
   246  	bytesReturned   int64
   247  
   248  	payloadBuffer      []byte
   249  	payloadBufferIndex int
   250  	payloadCh          chan *bytes.Buffer
   251  
   252  	finBytesScanned, finBytesProcessed int64
   253  
   254  	errCh  chan []byte
   255  	doneCh chan struct{}
   256  }
   257  
   258  func (writer *messageWriter) write(data []byte) bool {
   259  	if _, err := writer.writer.Write(data); err != nil {
   260  		return false
   261  	}
   262  
   263  	writer.writer.(http.Flusher).Flush()
   264  	return true
   265  }
   266  
   267  func (writer *messageWriter) start() {
   268  	keepAliveTicker := time.NewTicker(1 * time.Second)
   269  	var progressTicker *time.Ticker
   270  	var progressTickerC <-chan time.Time
   271  	if writer.getProgressFunc != nil {
   272  		progressTicker = time.NewTicker(1 * time.Minute)
   273  		progressTickerC = progressTicker.C
   274  	}
   275  	recordStagingTicker := time.NewTicker(500 * time.Millisecond)
   276  
   277  	// Exit conditions:
   278  	//
   279  	// 1. If a writer.write() returns false, select loop below exits and
   280  	// closes `doneCh` to indicate to caller to also exit.
   281  	//
   282  	// 2. If caller (Evaluate()) has an error, it sends an error
   283  	// message and waits for this go-routine to quit in
   284  	// FinishWithError()
   285  	//
   286  	// 3. If caller is done, it waits for this go-routine to exit
   287  	// in Finish()
   288  
   289  	quitFlag := false
   290  	for !quitFlag {
   291  		select {
   292  		case data := <-writer.errCh:
   293  			quitFlag = true
   294  			// Flush collected records before sending error message
   295  			if !writer.flushRecords() {
   296  				break
   297  			}
   298  			writer.write(data)
   299  
   300  		case payload, ok := <-writer.payloadCh:
   301  			if !ok {
   302  				// payloadCh is closed by caller to
   303  				// indicate finish with success
   304  				quitFlag = true
   305  
   306  				if !writer.flushRecords() {
   307  					break
   308  				}
   309  				// Write Stats message, then End message
   310  				bytesReturned := atomic.LoadInt64(&writer.bytesReturned)
   311  				if !writer.write(newStatsMessage(writer.finBytesScanned, writer.finBytesProcessed, bytesReturned)) {
   312  					break
   313  				}
   314  				writer.write(endMessage)
   315  			} else {
   316  				for payload.Len() > 0 {
   317  					copiedLen := copy(writer.payloadBuffer[writer.payloadBufferIndex:], payload.Bytes())
   318  					writer.payloadBufferIndex += copiedLen
   319  					payload.Next(copiedLen)
   320  
   321  					// If buffer is filled, flush it now!
   322  					freeSpace := bufLength - writer.payloadBufferIndex
   323  					if freeSpace == 0 {
   324  						if !writer.flushRecords() {
   325  							quitFlag = true
   326  							break
   327  						}
   328  					}
   329  				}
   330  
   331  				bufPool.Put(payload)
   332  			}
   333  
   334  		case <-recordStagingTicker.C:
   335  			if !writer.flushRecords() {
   336  				quitFlag = true
   337  			}
   338  
   339  		case <-keepAliveTicker.C:
   340  			if !writer.write(continuationMessage) {
   341  				quitFlag = true
   342  			}
   343  
   344  		case <-progressTickerC:
   345  			bytesScanned, bytesProcessed := writer.getProgressFunc()
   346  			bytesReturned := atomic.LoadInt64(&writer.bytesReturned)
   347  			if !writer.write(newProgressMessage(bytesScanned, bytesProcessed, bytesReturned)) {
   348  				quitFlag = true
   349  			}
   350  		}
   351  	}
   352  	close(writer.doneCh)
   353  
   354  	recordStagingTicker.Stop()
   355  	keepAliveTicker.Stop()
   356  	if progressTicker != nil {
   357  		progressTicker.Stop()
   358  	}
   359  
   360  	// Whatever drain the payloadCh to prevent from memory leaking.
   361  	for len(writer.payloadCh) > 0 {
   362  		payload := <-writer.payloadCh
   363  		bufPool.Put(payload)
   364  	}
   365  }
   366  
   367  // Sends a single whole record.
   368  func (writer *messageWriter) SendRecord(payload *bytes.Buffer) error {
   369  	select {
   370  	case writer.payloadCh <- payload:
   371  		return nil
   372  	case <-writer.doneCh:
   373  		return fmt.Errorf("messageWriter is done")
   374  	}
   375  }
   376  
   377  func (writer *messageWriter) flushRecords() bool {
   378  	if writer.payloadBufferIndex == 0 {
   379  		return true
   380  	}
   381  	result := writer.write(newRecordsMessage(writer.payloadBuffer[0:writer.payloadBufferIndex]))
   382  	if result {
   383  		atomic.AddInt64(&writer.bytesReturned, int64(writer.payloadBufferIndex))
   384  		writer.payloadBufferIndex = 0
   385  	}
   386  	return result
   387  }
   388  
   389  // Finish is the last call to the message writer - it sends any
   390  // remaining record payload, then sends statistics and finally the end
   391  // message.
   392  func (writer *messageWriter) Finish(bytesScanned, bytesProcessed int64) error {
   393  	select {
   394  	case <-writer.doneCh:
   395  		return fmt.Errorf("messageWriter is done")
   396  	default:
   397  		writer.finBytesScanned = bytesScanned
   398  		writer.finBytesProcessed = bytesProcessed
   399  		close(writer.payloadCh)
   400  		// Wait until the `start` go-routine is done.
   401  		<-writer.doneCh
   402  		return nil
   403  	}
   404  }
   405  
   406  func (writer *messageWriter) FinishWithError(errorCode, errorMessage string) error {
   407  	select {
   408  	case <-writer.doneCh:
   409  		return fmt.Errorf("messageWriter is done")
   410  	case writer.errCh <- newErrorMessage([]byte(errorCode), []byte(errorMessage)):
   411  		// Wait until the `start` go-routine is done.
   412  		<-writer.doneCh
   413  		return nil
   414  	}
   415  }
   416  
   417  // newMessageWriter creates a message writer that writes to the HTTP
   418  // response writer
   419  func newMessageWriter(w http.ResponseWriter, getProgressFunc func() (bytesScanned, bytesProcessed int64)) *messageWriter {
   420  	writer := &messageWriter{
   421  		writer:          w,
   422  		getProgressFunc: getProgressFunc,
   423  
   424  		payloadBuffer: make([]byte, bufLength),
   425  		payloadCh:     make(chan *bytes.Buffer, 1),
   426  
   427  		errCh:  make(chan []byte),
   428  		doneCh: make(chan struct{}),
   429  	}
   430  	go writer.start()
   431  	return writer
   432  }