github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/select.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package s3select
    19  
    20  import (
    21  	"bufio"
    22  	"bytes"
    23  	"compress/bzip2"
    24  	"encoding/xml"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"net/http"
    29  	"strings"
    30  	"sync"
    31  
    32  	"github.com/klauspost/compress/s2"
    33  	"github.com/klauspost/compress/zstd"
    34  	gzip "github.com/klauspost/pgzip"
    35  	"github.com/minio/minio/internal/config"
    36  	xioutil "github.com/minio/minio/internal/ioutil"
    37  	"github.com/minio/minio/internal/s3select/csv"
    38  	"github.com/minio/minio/internal/s3select/json"
    39  	"github.com/minio/minio/internal/s3select/parquet"
    40  	"github.com/minio/minio/internal/s3select/simdj"
    41  	"github.com/minio/minio/internal/s3select/sql"
    42  	"github.com/minio/pkg/v2/env"
    43  	"github.com/minio/simdjson-go"
    44  	"github.com/pierrec/lz4"
    45  )
    46  
    47  type recordReader interface {
    48  	// Read a record.
    49  	// dst is optional but will be used if valid.
    50  	Read(dst sql.Record) (sql.Record, error)
    51  	Close() error
    52  }
    53  
    54  const (
    55  	csvFormat     = "csv"
    56  	jsonFormat    = "json"
    57  	parquetFormat = "parquet"
    58  )
    59  
    60  // CompressionType - represents value inside <CompressionType/> in request XML.
    61  type CompressionType string
    62  
    63  const (
    64  	noneType  CompressionType = "none"
    65  	gzipType  CompressionType = "GZIP"
    66  	bzip2Type CompressionType = "BZIP2"
    67  
    68  	zstdType   CompressionType = "ZSTD"
    69  	lz4Type    CompressionType = "LZ4"
    70  	s2Type     CompressionType = "S2"
    71  	snappyType CompressionType = "SNAPPY"
    72  )
    73  
    74  const (
    75  	maxRecordSize = 1 << 20 // 1 MiB
    76  )
    77  
    78  var parquetSupport bool
    79  
    80  func init() {
    81  	parquetSupport = env.Get("MINIO_API_SELECT_PARQUET", config.EnableOff) == config.EnableOn
    82  }
    83  
    84  var bufPool = sync.Pool{
    85  	New: func() interface{} {
    86  		// make a buffer with a reasonable capacity.
    87  		return bytes.NewBuffer(make([]byte, 0, maxRecordSize))
    88  	},
    89  }
    90  
    91  var bufioWriterPool = sync.Pool{
    92  	New: func() interface{} {
    93  		// io.Discard is just used to create the writer. Actual destination
    94  		// writer is set later by Reset() before using it.
    95  		return bufio.NewWriter(xioutil.Discard)
    96  	},
    97  }
    98  
    99  // UnmarshalXML - decodes XML data.
   100  func (c *CompressionType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   101  	var s string
   102  	if err := d.DecodeElement(&s, &start); err != nil {
   103  		return errMalformedXML(err)
   104  	}
   105  
   106  	parsedType := CompressionType(strings.ToUpper(s))
   107  	if s == "" || parsedType == "NONE" {
   108  		parsedType = noneType
   109  	}
   110  
   111  	switch parsedType {
   112  	case noneType, gzipType, bzip2Type, snappyType, s2Type, zstdType, lz4Type:
   113  	default:
   114  		return errInvalidCompressionFormat(fmt.Errorf("invalid compression format '%v'", s))
   115  	}
   116  
   117  	*c = parsedType
   118  	return nil
   119  }
   120  
   121  // InputSerialization - represents elements inside <InputSerialization/> in request XML.
   122  type InputSerialization struct {
   123  	CompressionType CompressionType    `xml:"CompressionType"`
   124  	CSVArgs         csv.ReaderArgs     `xml:"CSV"`
   125  	JSONArgs        json.ReaderArgs    `xml:"JSON"`
   126  	ParquetArgs     parquet.ReaderArgs `xml:"Parquet"`
   127  	unmarshaled     bool
   128  	format          string
   129  }
   130  
   131  // IsEmpty - returns whether input serialization is empty or not.
   132  func (input *InputSerialization) IsEmpty() bool {
   133  	return !input.unmarshaled
   134  }
   135  
   136  // UnmarshalXML - decodes XML data.
   137  func (input *InputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   138  	// Make subtype to avoid recursive UnmarshalXML().
   139  	type subInputSerialization InputSerialization
   140  	parsedInput := subInputSerialization{}
   141  	if err := d.DecodeElement(&parsedInput, &start); err != nil {
   142  		return errMalformedXML(err)
   143  	}
   144  
   145  	// If no compression is specified, set to noneType
   146  	if parsedInput.CompressionType == "" {
   147  		parsedInput.CompressionType = noneType
   148  	}
   149  
   150  	found := 0
   151  	if !parsedInput.CSVArgs.IsEmpty() {
   152  		parsedInput.format = csvFormat
   153  		found++
   154  	}
   155  	if !parsedInput.JSONArgs.IsEmpty() {
   156  		parsedInput.format = jsonFormat
   157  		found++
   158  	}
   159  	if !parsedInput.ParquetArgs.IsEmpty() {
   160  		if parsedInput.CompressionType != "" && parsedInput.CompressionType != noneType {
   161  			return errInvalidRequestParameter(fmt.Errorf("CompressionType must be NONE for Parquet format"))
   162  		}
   163  
   164  		parsedInput.format = parquetFormat
   165  		found++
   166  	}
   167  
   168  	if found != 1 {
   169  		return errInvalidDataSource(nil)
   170  	}
   171  
   172  	*input = InputSerialization(parsedInput)
   173  	input.unmarshaled = true
   174  	return nil
   175  }
   176  
   177  // OutputSerialization - represents elements inside <OutputSerialization/> in request XML.
   178  type OutputSerialization struct {
   179  	CSVArgs     csv.WriterArgs  `xml:"CSV"`
   180  	JSONArgs    json.WriterArgs `xml:"JSON"`
   181  	unmarshaled bool
   182  	format      string
   183  }
   184  
   185  // IsEmpty - returns whether output serialization is empty or not.
   186  func (output *OutputSerialization) IsEmpty() bool {
   187  	return !output.unmarshaled
   188  }
   189  
   190  // UnmarshalXML - decodes XML data.
   191  func (output *OutputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   192  	// Make subtype to avoid recursive UnmarshalXML().
   193  	type subOutputSerialization OutputSerialization
   194  	parsedOutput := subOutputSerialization{}
   195  	if err := d.DecodeElement(&parsedOutput, &start); err != nil {
   196  		return errMalformedXML(err)
   197  	}
   198  
   199  	found := 0
   200  	if !parsedOutput.CSVArgs.IsEmpty() {
   201  		parsedOutput.format = csvFormat
   202  		found++
   203  	}
   204  	if !parsedOutput.JSONArgs.IsEmpty() {
   205  		parsedOutput.format = jsonFormat
   206  		found++
   207  	}
   208  	if found != 1 {
   209  		return errObjectSerializationConflict(fmt.Errorf("either CSV or JSON should be present in OutputSerialization"))
   210  	}
   211  
   212  	*output = OutputSerialization(parsedOutput)
   213  	output.unmarshaled = true
   214  	return nil
   215  }
   216  
   217  // RequestProgress - represents elements inside <RequestProgress/> in request XML.
   218  type RequestProgress struct {
   219  	Enabled bool `xml:"Enabled"`
   220  }
   221  
   222  // ScanRange represents the ScanRange parameter.
   223  type ScanRange struct {
   224  	// Start is the byte offset to read from (from the start of the file).
   225  	Start *uint64 `xml:"Start"`
   226  	// End is the offset of the last byte that should be returned when Start
   227  	// is set, otherwise it is the offset from EOF to start reading.
   228  	End *uint64 `xml:"End"`
   229  }
   230  
   231  // Validate if the scan range is valid.
   232  func (s *ScanRange) Validate() error {
   233  	if s == nil {
   234  		return nil
   235  	}
   236  	if s.Start == nil && s.End == nil {
   237  		// This parameter is optional, but when specified, it must not be empty.
   238  		// Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html#AmazonS3-SelectObjectContent-request-ScanRange
   239  		return errors.New("ScanRange: No Start or End specified")
   240  	}
   241  	if s.Start == nil || s.End == nil {
   242  		return nil
   243  	}
   244  	if *s.Start > *s.End {
   245  		return errors.New("ScanRange: Start cannot be after end")
   246  	}
   247  	return nil
   248  }
   249  
   250  // StartLen returns start offset plus length from range.
   251  func (s *ScanRange) StartLen() (start, length int64, err error) {
   252  	if s == nil {
   253  		return 0, -1, nil
   254  	}
   255  	err = s.Validate()
   256  	if err != nil {
   257  		return 0, 0, err
   258  	}
   259  
   260  	if s.End == nil && s.Start == nil {
   261  		// Not valid, but should be caught above.
   262  		return 0, -1, nil
   263  	}
   264  	if s.End == nil {
   265  		start := int64(*s.Start)
   266  		if start < 0 {
   267  			return 0, 0, errors.New("ScanRange: Start after EOF")
   268  		}
   269  		return start, -1, nil
   270  	}
   271  	if s.Start == nil {
   272  		// Suffix length
   273  		end := int64(*s.End)
   274  		if end < 0 {
   275  			return 0, 0, errors.New("ScanRange: End bigger than file")
   276  		}
   277  		// Suffix length
   278  		return -end, -1, nil
   279  	}
   280  	start = int64(*s.Start)
   281  	end := int64(*s.End)
   282  	return start, end - start + 1, nil
   283  }
   284  
   285  // S3Select - filters the contents on a simple structured query language (SQL) statement. It
   286  // represents elements inside <SelectRequest/> in request XML specified in detail at
   287  // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
   288  type S3Select struct {
   289  	XMLName        xml.Name            `xml:"SelectRequest"`
   290  	Expression     string              `xml:"Expression"`
   291  	ExpressionType string              `xml:"ExpressionType"`
   292  	Input          InputSerialization  `xml:"InputSerialization"`
   293  	Output         OutputSerialization `xml:"OutputSerialization"`
   294  	Progress       RequestProgress     `xml:"RequestProgress"`
   295  	ScanRange      *ScanRange          `xml:"ScanRange"`
   296  
   297  	statement      *sql.SelectStatement
   298  	progressReader *progressReader
   299  	recordReader   recordReader
   300  }
   301  
   302  var legacyXMLName = "SelectObjectContentRequest"
   303  
   304  // UnmarshalXML - decodes XML data.
   305  func (s3Select *S3Select) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
   306  	// S3 also supports the older SelectObjectContentRequest tag,
   307  	// though it is no longer found in documentation. This is
   308  	// checked and renamed below to allow older clients to also
   309  	// work.
   310  	if start.Name.Local == legacyXMLName {
   311  		start.Name = xml.Name{Space: "", Local: "SelectRequest"}
   312  	}
   313  
   314  	// Make subtype to avoid recursive UnmarshalXML().
   315  	type subS3Select S3Select
   316  	parsedS3Select := subS3Select{}
   317  	if err := d.DecodeElement(&parsedS3Select, &start); err != nil {
   318  		if _, ok := err.(*s3Error); ok {
   319  			return err
   320  		}
   321  
   322  		return errMalformedXML(err)
   323  	}
   324  	if err := parsedS3Select.ScanRange.Validate(); err != nil {
   325  		return errInvalidScanRangeParameter(err)
   326  	}
   327  	parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType)
   328  	if parsedS3Select.ExpressionType != "sql" {
   329  		return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType))
   330  	}
   331  
   332  	if parsedS3Select.Input.IsEmpty() {
   333  		return errMissingRequiredParameter(fmt.Errorf("InputSerialization must be provided"))
   334  	}
   335  
   336  	if parsedS3Select.Output.IsEmpty() {
   337  		return errMissingRequiredParameter(fmt.Errorf("OutputSerialization must be provided"))
   338  	}
   339  
   340  	statement, err := sql.ParseSelectStatement(parsedS3Select.Expression)
   341  	if err != nil {
   342  		return err
   343  	}
   344  
   345  	parsedS3Select.statement = &statement
   346  
   347  	*s3Select = S3Select(parsedS3Select)
   348  	return nil
   349  }
   350  
   351  func (s3Select *S3Select) outputRecord() sql.Record {
   352  	switch s3Select.Output.format {
   353  	case csvFormat:
   354  		return csv.NewRecord()
   355  	case jsonFormat:
   356  		return json.NewRecord(sql.SelectFmtJSON)
   357  	}
   358  
   359  	panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format))
   360  }
   361  
   362  func (s3Select *S3Select) getProgress() (bytesScanned, bytesProcessed int64) {
   363  	if s3Select.progressReader != nil {
   364  		return s3Select.progressReader.Stats()
   365  	}
   366  
   367  	return -1, -1
   368  }
   369  
   370  // Open - opens S3 object by using callback for SQL selection query.
   371  // Currently CSV, JSON and Apache Parquet formats are supported.
   372  func (s3Select *S3Select) Open(rsc io.ReadSeekCloser) error {
   373  	offset, length, err := s3Select.ScanRange.StartLen()
   374  	if err != nil {
   375  		return err
   376  	}
   377  	seekDirection := io.SeekStart
   378  	if offset < 0 {
   379  		seekDirection = io.SeekEnd
   380  	}
   381  	switch s3Select.Input.format {
   382  	case csvFormat:
   383  		_, err = rsc.Seek(offset, seekDirection)
   384  		if err != nil {
   385  			return err
   386  		}
   387  		var rc io.ReadCloser = rsc
   388  		if length != -1 {
   389  			rc = newLimitedReadCloser(rsc, length)
   390  		}
   391  
   392  		s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType)
   393  		if err != nil {
   394  			rsc.Close()
   395  			return err
   396  		}
   397  
   398  		s3Select.recordReader, err = csv.NewReader(s3Select.progressReader, &s3Select.Input.CSVArgs)
   399  		if err != nil {
   400  			// Close all reader resources opened so far.
   401  			s3Select.progressReader.Close()
   402  
   403  			var stErr bzip2.StructuralError
   404  			if errors.As(err, &stErr) {
   405  				return errInvalidCompression(err, s3Select.Input.CompressionType)
   406  			}
   407  			// Test these compressor errors
   408  			errs := []error{
   409  				gzip.ErrHeader, gzip.ErrChecksum,
   410  				s2.ErrCorrupt, s2.ErrUnsupported, s2.ErrCRC,
   411  				zstd.ErrBlockTooSmall, zstd.ErrMagicMismatch, zstd.ErrWindowSizeExceeded, zstd.ErrUnknownDictionary, zstd.ErrWindowSizeTooSmall,
   412  				lz4.ErrInvalid, lz4.ErrBlockDependency,
   413  			}
   414  			for _, e := range errs {
   415  				if errors.Is(err, e) {
   416  					return errInvalidCompression(err, s3Select.Input.CompressionType)
   417  				}
   418  			}
   419  			return err
   420  		}
   421  		return nil
   422  	case jsonFormat:
   423  		_, err = rsc.Seek(offset, seekDirection)
   424  		if err != nil {
   425  			return err
   426  		}
   427  		var rc io.ReadCloser = rsc
   428  		if length != -1 {
   429  			rc = newLimitedReadCloser(rsc, length)
   430  		}
   431  
   432  		s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType)
   433  		if err != nil {
   434  			rsc.Close()
   435  			return err
   436  		}
   437  
   438  		if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") {
   439  			if simdjson.SupportedCPU() {
   440  				s3Select.recordReader = simdj.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
   441  			} else {
   442  				s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
   443  			}
   444  		} else {
   445  			s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
   446  		}
   447  
   448  		return nil
   449  	case parquetFormat:
   450  		if !parquetSupport {
   451  			return errors.New("parquet format parsing not enabled on server")
   452  		}
   453  		if offset != 0 || length != -1 {
   454  			// Offsets do not make sense in parquet files.
   455  			return errors.New("parquet format does not support offsets")
   456  		}
   457  		var err error
   458  		s3Select.recordReader, err = parquet.NewParquetReader(rsc, &s3Select.Input.ParquetArgs)
   459  		return err
   460  	}
   461  
   462  	return fmt.Errorf("unknown input format '%v'", s3Select.Input.format)
   463  }
   464  
   465  func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error {
   466  	switch s3Select.Output.format {
   467  	case csvFormat:
   468  		// Use bufio Writer to prevent csv.Writer from allocating a new buffer.
   469  		bufioWriter := bufioWriterPool.Get().(*bufio.Writer)
   470  		defer func() {
   471  			bufioWriter.Reset(xioutil.Discard)
   472  			bufioWriterPool.Put(bufioWriter)
   473  		}()
   474  
   475  		bufioWriter.Reset(buf)
   476  		opts := sql.WriteCSVOpts{
   477  			FieldDelimiter: []rune(s3Select.Output.CSVArgs.FieldDelimiter)[0],
   478  			Quote:          []rune(s3Select.Output.CSVArgs.QuoteCharacter)[0],
   479  			QuoteEscape:    []rune(s3Select.Output.CSVArgs.QuoteEscapeCharacter)[0],
   480  			AlwaysQuote:    strings.EqualFold(s3Select.Output.CSVArgs.QuoteFields, "always"),
   481  		}
   482  		err := record.WriteCSV(bufioWriter, opts)
   483  		if err != nil {
   484  			return err
   485  		}
   486  		err = bufioWriter.Flush()
   487  		if err != nil {
   488  			return err
   489  		}
   490  		if buf.Bytes()[buf.Len()-1] == '\n' {
   491  			buf.Truncate(buf.Len() - 1)
   492  		}
   493  		buf.WriteString(s3Select.Output.CSVArgs.RecordDelimiter)
   494  
   495  		return nil
   496  	case jsonFormat:
   497  		err := record.WriteJSON(buf)
   498  		if err != nil {
   499  			return err
   500  		}
   501  		// Trim trailing newline from non-simd output
   502  		if buf.Bytes()[buf.Len()-1] == '\n' {
   503  			buf.Truncate(buf.Len() - 1)
   504  		}
   505  		buf.WriteString(s3Select.Output.JSONArgs.RecordDelimiter)
   506  
   507  		return nil
   508  	}
   509  
   510  	panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format))
   511  }
   512  
   513  // Evaluate - filters and sends records read from opened reader as per select statement to http response writer.
   514  func (s3Select *S3Select) Evaluate(w http.ResponseWriter) {
   515  	getProgressFunc := s3Select.getProgress
   516  	if !s3Select.Progress.Enabled {
   517  		getProgressFunc = nil
   518  	}
   519  	writer := newMessageWriter(w, getProgressFunc)
   520  
   521  	var outputQueue []sql.Record
   522  
   523  	// Create queue based on the type.
   524  	if s3Select.statement.IsAggregated() {
   525  		outputQueue = make([]sql.Record, 0, 1)
   526  	} else {
   527  		outputQueue = make([]sql.Record, 0, 100)
   528  	}
   529  	var err error
   530  	sendRecord := func() bool {
   531  		buf := bufPool.Get().(*bytes.Buffer)
   532  		buf.Reset()
   533  
   534  		for _, outputRecord := range outputQueue {
   535  			if outputRecord == nil {
   536  				continue
   537  			}
   538  			before := buf.Len()
   539  			if err = s3Select.marshal(buf, outputRecord); err != nil {
   540  				bufPool.Put(buf)
   541  				return false
   542  			}
   543  			if buf.Len()-before > maxRecordSize {
   544  				writer.FinishWithError("OverMaxRecordSize", "The length of a record in the input or result is greater than maxCharsPerRecord of 1 MB.")
   545  				bufPool.Put(buf)
   546  				return false
   547  			}
   548  		}
   549  
   550  		if err = writer.SendRecord(buf); err != nil {
   551  			// FIXME: log this error.
   552  			err = nil
   553  			bufPool.Put(buf)
   554  			return false
   555  		}
   556  		outputQueue = outputQueue[:0]
   557  		return true
   558  	}
   559  
   560  	var rec sql.Record
   561  OuterLoop:
   562  	for {
   563  		if s3Select.statement.LimitReached() {
   564  			if !sendRecord() {
   565  				break
   566  			}
   567  			if err = writer.Finish(s3Select.getProgress()); err != nil {
   568  				// FIXME: log this error.
   569  				err = nil
   570  			}
   571  			break
   572  		}
   573  
   574  		if rec, err = s3Select.recordReader.Read(rec); err != nil {
   575  			if err != io.EOF {
   576  				break
   577  			}
   578  
   579  			if s3Select.statement.IsAggregated() {
   580  				outputRecord := s3Select.outputRecord()
   581  				if err = s3Select.statement.AggregateResult(outputRecord); err != nil {
   582  					break
   583  				}
   584  				outputQueue = append(outputQueue, outputRecord)
   585  			}
   586  
   587  			if !sendRecord() {
   588  				break
   589  			}
   590  
   591  			if err = writer.Finish(s3Select.getProgress()); err != nil {
   592  				// FIXME: log this error.
   593  				err = nil
   594  			}
   595  			break
   596  		}
   597  
   598  		var inputRecords []*sql.Record
   599  		if inputRecords, err = s3Select.statement.EvalFrom(s3Select.Input.format, rec); err != nil {
   600  			break
   601  		}
   602  
   603  		for _, inputRecord := range inputRecords {
   604  			if s3Select.statement.IsAggregated() {
   605  				if err = s3Select.statement.AggregateRow(*inputRecord); err != nil {
   606  					break OuterLoop
   607  				}
   608  			} else {
   609  				var outputRecord sql.Record
   610  				// We will attempt to reuse the records in the table.
   611  				// The type of these should not change.
   612  				// The queue should always have at least one entry left for this to work.
   613  				outputQueue = outputQueue[:len(outputQueue)+1]
   614  				if t := outputQueue[len(outputQueue)-1]; t != nil {
   615  					// If the output record is already set, we reuse it.
   616  					outputRecord = t
   617  					outputRecord.Reset()
   618  				} else {
   619  					// Create new one
   620  					outputRecord = s3Select.outputRecord()
   621  					outputQueue[len(outputQueue)-1] = outputRecord
   622  				}
   623  				outputRecord, err = s3Select.statement.Eval(*inputRecord, outputRecord)
   624  				if outputRecord == nil || err != nil {
   625  					// This should not be written.
   626  					// Remove it from the queue.
   627  					outputQueue = outputQueue[:len(outputQueue)-1]
   628  					if err != nil {
   629  						break OuterLoop
   630  					}
   631  					continue
   632  				}
   633  
   634  				outputQueue[len(outputQueue)-1] = outputRecord
   635  				if s3Select.statement.LimitReached() {
   636  					if !sendRecord() {
   637  						break
   638  					}
   639  					if err = writer.Finish(s3Select.getProgress()); err != nil {
   640  						// FIXME: log this error.
   641  						err = nil
   642  					}
   643  					return
   644  				}
   645  
   646  				if len(outputQueue) < cap(outputQueue) {
   647  					continue
   648  				}
   649  
   650  				if !sendRecord() {
   651  					break OuterLoop
   652  				}
   653  			}
   654  		}
   655  	}
   656  
   657  	if err != nil {
   658  		_ = writer.FinishWithError("InternalError", err.Error())
   659  	}
   660  }
   661  
   662  // Close - closes opened S3 object.
   663  func (s3Select *S3Select) Close() error {
   664  	if s3Select.recordReader == nil {
   665  		return nil
   666  	}
   667  	return s3Select.recordReader.Close()
   668  }
   669  
   670  // NewS3Select - creates new S3Select by given request XML reader.
   671  func NewS3Select(r io.Reader) (*S3Select, error) {
   672  	s3Select := &S3Select{}
   673  	if err := xml.NewDecoder(r).Decode(s3Select); err != nil {
   674  		return nil, err
   675  	}
   676  
   677  	return s3Select, nil
   678  }
   679  
   680  //////////////////
   681  // Helpers
   682  /////////////////
   683  
   684  // limitedReadCloser is like io.LimitedReader, but also implements io.Closer.
   685  type limitedReadCloser struct {
   686  	io.LimitedReader
   687  	io.Closer
   688  }
   689  
   690  func newLimitedReadCloser(r io.ReadCloser, n int64) *limitedReadCloser {
   691  	return &limitedReadCloser{
   692  		LimitedReader: io.LimitedReader{R: r, N: n},
   693  		Closer:        r,
   694  	}
   695  }
   696  
   697  // ObjectSegmentReaderFn is a function that returns a reader for a contiguous
   698  // suffix segment of an object starting at the given (non-negative) offset.
   699  type ObjectSegmentReaderFn func(offset int64) (io.ReadCloser, error)
   700  
   701  // ObjectReadSeekCloser implements ReadSeekCloser interface for reading objects.
   702  // It uses a function that returns a io.ReadCloser for the object.
   703  type ObjectReadSeekCloser struct {
   704  	segmentReader ObjectSegmentReaderFn
   705  
   706  	size   int64 // actual object size regardless of compression/encryption
   707  	offset int64
   708  	reader io.ReadCloser
   709  
   710  	// reader can be closed idempotently multiple times
   711  	closerOnce sync.Once
   712  	// Error storing reader.Close()
   713  	closerErr error
   714  }
   715  
   716  // NewObjectReadSeekCloser creates a new ObjectReadSeekCloser.
   717  func NewObjectReadSeekCloser(segmentReader ObjectSegmentReaderFn, actualSize int64) *ObjectReadSeekCloser {
   718  	return &ObjectReadSeekCloser{
   719  		segmentReader: segmentReader,
   720  		size:          actualSize,
   721  		offset:        0,
   722  		reader:        nil,
   723  	}
   724  }
   725  
   726  // Seek call to implement io.Seeker
   727  func (rsc *ObjectReadSeekCloser) Seek(offset int64, whence int) (int64, error) {
   728  	// fmt.Printf("actual: %v offset: %v (%v) whence: %v\n", rsc.size, offset, rsc.offset, whence)
   729  	switch whence {
   730  	case io.SeekStart:
   731  		rsc.offset = offset
   732  	case io.SeekCurrent:
   733  		rsc.offset += offset
   734  	case io.SeekEnd:
   735  		rsc.offset = rsc.size + offset
   736  	}
   737  	if rsc.offset < 0 {
   738  		return rsc.offset, errors.New("seek to invalid negative offset")
   739  	}
   740  	if rsc.offset >= rsc.size {
   741  		return rsc.offset, errors.New("seek past end of object")
   742  	}
   743  	if rsc.reader != nil {
   744  		_ = rsc.reader.Close()
   745  		rsc.reader = nil
   746  	}
   747  	return rsc.offset, nil
   748  }
   749  
   750  // Read call to implement io.Reader
   751  func (rsc *ObjectReadSeekCloser) Read(p []byte) (n int, err error) {
   752  	if rsc.reader == nil {
   753  		rsc.reader, err = rsc.segmentReader(rsc.offset)
   754  		if err != nil {
   755  			return 0, err
   756  		}
   757  	}
   758  	return rsc.reader.Read(p)
   759  }
   760  
   761  // Close call to implement io.Closer. Calling Read/Seek after Close reopens the
   762  // object for reading and a subsequent Close call is required to ensure
   763  // resources are freed.
   764  func (rsc *ObjectReadSeekCloser) Close() error {
   765  	rsc.closerOnce.Do(func() {
   766  		if rsc.reader != nil {
   767  			rsc.closerErr = rsc.reader.Close()
   768  			rsc.reader = nil
   769  		}
   770  	})
   771  	return rsc.closerErr
   772  }