github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/message.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package s3select 19 20 import ( 21 "bytes" 22 "encoding/binary" 23 "fmt" 24 "hash/crc32" 25 "net/http" 26 "strconv" 27 "sync/atomic" 28 "time" 29 ) 30 31 // A message is in the format specified in 32 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-frame-overview.png 33 // hence the calculation is made accordingly. 34 func totalByteLength(headerLength, payloadLength int) int { 35 return 4 + 4 + 4 + headerLength + payloadLength + 4 36 } 37 38 func genMessage(header, payload []byte) []byte { 39 headerLength := len(header) 40 payloadLength := len(payload) 41 totalLength := totalByteLength(headerLength, payloadLength) 42 43 buf := new(bytes.Buffer) 44 binary.Write(buf, binary.BigEndian, uint32(totalLength)) 45 binary.Write(buf, binary.BigEndian, uint32(headerLength)) 46 prelude := buf.Bytes() 47 binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude)) 48 buf.Write(header) 49 if payload != nil { 50 buf.Write(payload) 51 } 52 message := buf.Bytes() 53 binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message)) 54 55 return buf.Bytes() 56 } 57 58 // Refer genRecordsHeader(). 59 var recordsHeader = []byte{ 60 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', 61 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 24, 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'o', 'c', 't', 'e', 't', '-', 's', 't', 'r', 'e', 'a', 'm', 62 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 7, 'R', 'e', 'c', 'o', 'r', 'd', 's', 63 } 64 65 const ( 66 // Chosen for compatibility with AWS JAVA SDK 67 // It has a buffer size of 128K: 68 // https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/internal/eventstreaming/MessageDecoder.java#L26 69 // but we must make sure there is always space to add 256 bytes: 70 // https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/model/SelectObjectContentEventStream.java#L197 71 maxRecordMessageLength = (128 << 10) - 256 72 ) 73 74 var bufLength = payloadLenForMsgLen(maxRecordMessageLength) 75 76 // newRecordsMessage - creates new Records Message which can contain a single record, partial records, 77 // or multiple records. Depending on the size of the result, a response can contain one or more of these messages. 78 // 79 // Header specification 80 // Records messages contain three headers, as follows: 81 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-record.png 82 // 83 // Payload specification 84 // Records message payloads can contain a single record, partial records, or multiple records. 85 func newRecordsMessage(payload []byte) []byte { 86 return genMessage(recordsHeader, payload) 87 } 88 89 // payloadLenForMsgLen computes the length of the payload in a record 90 // message given the total length of the message. 91 func payloadLenForMsgLen(messageLength int) int { 92 headerLength := len(recordsHeader) 93 payloadLength := messageLength - 4 - 4 - 4 - headerLength - 4 94 return payloadLength 95 } 96 97 // continuationMessage - S3 periodically sends this message to keep the TCP connection open. 98 // These messages appear in responses at random. The client must detect the message type and process accordingly. 99 // 100 // Header specification: 101 // Continuation messages contain two headers, as follows: 102 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-cont.png 103 // 104 // Payload specification: 105 // Continuation messages have no payload. 106 var continuationMessage = []byte{ 107 0, 0, 0, 57, // total byte-length. 108 0, 0, 0, 41, // headers byte-length. 109 139, 161, 157, 242, // prelude crc. 110 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers. 111 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 4, 'C', 'o', 'n', 't', // headers. 112 156, 134, 74, 13, // message crc. 113 } 114 115 // Refer genProgressHeader(). 116 var progressHeader = []byte{ 117 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', 118 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l', 119 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 'P', 'r', 'o', 'g', 'r', 'e', 's', 's', 120 } 121 122 // newProgressMessage - creates new Progress Message. S3 periodically sends this message, if requested. 123 // It contains information about the progress of a query that has started but has not yet completed. 124 // 125 // Header specification: 126 // Progress messages contain three headers, as follows: 127 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-progress.png 128 // 129 // Payload specification: 130 // Progress message payload is an XML document containing information about the progress of a request. 131 // - BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed). 132 // - BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed). 133 // - BytesReturned => Current number of bytes of records payload data returned by S3. 134 // 135 // For uncompressed files, BytesScanned and BytesProcessed are equal. 136 // 137 // Example: 138 // 139 // <?xml version="1.0" encoding="UTF-8"?> 140 // <Progress> 141 // 142 // <BytesScanned>512</BytesScanned> 143 // <BytesProcessed>1024</BytesProcessed> 144 // <BytesReturned>1024</BytesReturned> 145 // 146 // </Progress> 147 func newProgressMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte { 148 payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Progress><BytesScanned>` + 149 strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` + 150 strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` + 151 strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`) 152 return genMessage(progressHeader, payload) 153 } 154 155 // Refer genStatsHeader(). 156 var statsHeader = []byte{ 157 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', 158 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l', 159 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'S', 't', 'a', 't', 's', 160 } 161 162 // newStatsMessage - creates new Stats Message. S3 sends this message at the end of the request. 163 // It contains statistics about the query. 164 // 165 // Header specification: 166 // Stats messages contain three headers, as follows: 167 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-stats.png 168 // 169 // Payload specification: 170 // Stats message payload is an XML document containing information about a request's stats when processing is complete. 171 // - BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed). 172 // - BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed). 173 // - BytesReturned => Total number of bytes of records payload data returned by S3. 174 // 175 // For uncompressed files, BytesScanned and BytesProcessed are equal. 176 // 177 // Example: 178 // 179 // <?xml version="1.0" encoding="UTF-8"?> 180 // <Stats> 181 // 182 // <BytesScanned>512</BytesScanned> 183 // <BytesProcessed>1024</BytesProcessed> 184 // <BytesReturned>1024</BytesReturned> 185 // 186 // </Stats> 187 func newStatsMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte { 188 payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Stats><BytesScanned>` + 189 strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` + 190 strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` + 191 strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`) 192 return genMessage(statsHeader, payload) 193 } 194 195 // endMessage - indicates that the request is complete, and no more messages will be sent. 196 // You should not assume that the request is complete until the client receives an End message. 197 // 198 // Header specification: 199 // End messages contain two headers, as follows: 200 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-end.png 201 // 202 // Payload specification: 203 // End messages have no payload. 204 var endMessage = []byte{ 205 0, 0, 0, 56, // total byte-length. 206 0, 0, 0, 40, // headers byte-length. 207 193, 198, 132, 212, // prelude crc. 208 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers. 209 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 3, 'E', 'n', 'd', // headers. 210 207, 151, 211, 146, // message crc. 211 } 212 213 // newErrorMessage - creates new Request Level Error Message. S3 sends this message if the request failed for any reason. 214 // It contains the error code and error message for the failure. If S3 sends a RequestLevelError message, 215 // it doesn't send an End message. 216 // 217 // Header specification: 218 // Request-level error messages contain three headers, as follows: 219 // https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-error.png 220 // 221 // Payload specification: 222 // Request-level error messages have no payload. 223 func newErrorMessage(errorCode, errorMessage []byte) []byte { 224 buf := new(bytes.Buffer) 225 226 buf.Write([]byte{13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'r', 'r', 'o', 'r'}) 227 228 buf.Write([]byte{14, ':', 'e', 'r', 'r', 'o', 'r', '-', 'm', 'e', 's', 's', 'a', 'g', 'e', 7}) 229 binary.Write(buf, binary.BigEndian, uint16(len(errorMessage))) 230 buf.Write(errorMessage) 231 232 buf.Write([]byte{11, ':', 'e', 'r', 'r', 'o', 'r', '-', 'c', 'o', 'd', 'e', 7}) 233 binary.Write(buf, binary.BigEndian, uint16(len(errorCode))) 234 buf.Write(errorCode) 235 236 return genMessage(buf.Bytes(), nil) 237 } 238 239 // NewErrorMessage - creates new Request Level Error Message specified in 240 // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html. 241 func NewErrorMessage(errorCode, errorMessage string) []byte { 242 return newErrorMessage([]byte(errorCode), []byte(errorMessage)) 243 } 244 245 type messageWriter struct { 246 writer http.ResponseWriter 247 getProgressFunc func() (int64, int64) 248 bytesReturned int64 249 250 payloadBuffer []byte 251 payloadBufferIndex int 252 payloadCh chan *bytes.Buffer 253 254 finBytesScanned, finBytesProcessed int64 255 256 errCh chan []byte 257 doneCh chan struct{} 258 } 259 260 func (writer *messageWriter) write(data []byte) bool { 261 if _, err := writer.writer.Write(data); err != nil { 262 return false 263 } 264 265 writer.writer.(http.Flusher).Flush() 266 return true 267 } 268 269 func (writer *messageWriter) start() { 270 keepAliveTicker := time.NewTicker(1 * time.Second) 271 var progressTicker *time.Ticker 272 var progressTickerC <-chan time.Time 273 if writer.getProgressFunc != nil { 274 progressTicker = time.NewTicker(1 * time.Minute) 275 progressTickerC = progressTicker.C 276 } 277 recordStagingTicker := time.NewTicker(500 * time.Millisecond) 278 279 // Exit conditions: 280 // 281 // 1. If a writer.write() returns false, select loop below exits and 282 // closes `doneCh` to indicate to caller to also exit. 283 // 284 // 2. If caller (Evaluate()) has an error, it sends an error 285 // message and waits for this go-routine to quit in 286 // FinishWithError() 287 // 288 // 3. If caller is done, it waits for this go-routine to exit 289 // in Finish() 290 291 quitFlag := false 292 for !quitFlag { 293 select { 294 case data := <-writer.errCh: 295 quitFlag = true 296 // Flush collected records before sending error message 297 if !writer.flushRecords() { 298 break 299 } 300 writer.write(data) 301 302 case payload, ok := <-writer.payloadCh: 303 if !ok { 304 // payloadCh is closed by caller to 305 // indicate finish with success 306 quitFlag = true 307 308 if !writer.flushRecords() { 309 break 310 } 311 // Write Stats message, then End message 312 bytesReturned := atomic.LoadInt64(&writer.bytesReturned) 313 if !writer.write(newStatsMessage(writer.finBytesScanned, writer.finBytesProcessed, bytesReturned)) { 314 break 315 } 316 writer.write(endMessage) 317 } else { 318 for payload.Len() > 0 { 319 copiedLen := copy(writer.payloadBuffer[writer.payloadBufferIndex:], payload.Bytes()) 320 writer.payloadBufferIndex += copiedLen 321 payload.Next(copiedLen) 322 323 // If buffer is filled, flush it now! 324 freeSpace := bufLength - writer.payloadBufferIndex 325 if freeSpace == 0 { 326 if !writer.flushRecords() { 327 quitFlag = true 328 break 329 } 330 } 331 } 332 333 bufPool.Put(payload) 334 } 335 336 case <-recordStagingTicker.C: 337 if !writer.flushRecords() { 338 quitFlag = true 339 } 340 341 case <-keepAliveTicker.C: 342 if !writer.write(continuationMessage) { 343 quitFlag = true 344 } 345 346 case <-progressTickerC: 347 bytesScanned, bytesProcessed := writer.getProgressFunc() 348 bytesReturned := atomic.LoadInt64(&writer.bytesReturned) 349 if !writer.write(newProgressMessage(bytesScanned, bytesProcessed, bytesReturned)) { 350 quitFlag = true 351 } 352 } 353 } 354 close(writer.doneCh) 355 356 recordStagingTicker.Stop() 357 keepAliveTicker.Stop() 358 if progressTicker != nil { 359 progressTicker.Stop() 360 } 361 362 // Whatever drain the payloadCh to prevent from memory leaking. 363 for len(writer.payloadCh) > 0 { 364 payload := <-writer.payloadCh 365 bufPool.Put(payload) 366 } 367 } 368 369 // Sends a single whole record. 370 func (writer *messageWriter) SendRecord(payload *bytes.Buffer) error { 371 select { 372 case writer.payloadCh <- payload: 373 return nil 374 case <-writer.doneCh: 375 return fmt.Errorf("messageWriter is done") 376 } 377 } 378 379 func (writer *messageWriter) flushRecords() bool { 380 if writer.payloadBufferIndex == 0 { 381 return true 382 } 383 result := writer.write(newRecordsMessage(writer.payloadBuffer[0:writer.payloadBufferIndex])) 384 if result { 385 atomic.AddInt64(&writer.bytesReturned, int64(writer.payloadBufferIndex)) 386 writer.payloadBufferIndex = 0 387 } 388 return result 389 } 390 391 // Finish is the last call to the message writer - it sends any 392 // remaining record payload, then sends statistics and finally the end 393 // message. 394 func (writer *messageWriter) Finish(bytesScanned, bytesProcessed int64) error { 395 select { 396 case <-writer.doneCh: 397 return fmt.Errorf("messageWriter is done") 398 default: 399 writer.finBytesScanned = bytesScanned 400 writer.finBytesProcessed = bytesProcessed 401 close(writer.payloadCh) 402 // Wait until the `start` go-routine is done. 403 <-writer.doneCh 404 return nil 405 } 406 } 407 408 func (writer *messageWriter) FinishWithError(errorCode, errorMessage string) error { 409 select { 410 case <-writer.doneCh: 411 return fmt.Errorf("messageWriter is done") 412 case writer.errCh <- newErrorMessage([]byte(errorCode), []byte(errorMessage)): 413 // Wait until the `start` go-routine is done. 414 <-writer.doneCh 415 return nil 416 } 417 } 418 419 // newMessageWriter creates a message writer that writes to the HTTP 420 // response writer 421 func newMessageWriter(w http.ResponseWriter, getProgressFunc func() (bytesScanned, bytesProcessed int64)) *messageWriter { 422 writer := &messageWriter{ 423 writer: w, 424 getProgressFunc: getProgressFunc, 425 426 payloadBuffer: make([]byte, bufLength), 427 payloadCh: make(chan *bytes.Buffer, 1), 428 429 errCh: make(chan []byte), 430 doneCh: make(chan struct{}), 431 } 432 go writer.start() 433 return writer 434 }