storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/select_benchmark_test.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package s3select
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/csv"
    22  	"io"
    23  	"io/ioutil"
    24  	"math/rand"
    25  	"net/http"
    26  	"strconv"
    27  	"testing"
    28  	"time"
    29  
    30  	humanize "github.com/dustin/go-humanize"
    31  )
    32  
    33  const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    34  
    35  func newRandString(length int) string {
    36  	randSrc := rand.New(rand.NewSource(time.Now().UnixNano()))
    37  
    38  	b := make([]byte, length)
    39  	for i := range b {
    40  		b[i] = charset[randSrc.Intn(len(charset))]
    41  	}
    42  	return string(b)
    43  }
    44  
    45  func genSampleCSVData(count int) []byte {
    46  	buf := &bytes.Buffer{}
    47  	csvWriter := csv.NewWriter(buf)
    48  	csvWriter.Write([]string{"id", "name", "age", "city"})
    49  
    50  	for i := 0; i < count; i++ {
    51  		csvWriter.Write([]string{
    52  			strconv.Itoa(i),
    53  			newRandString(10),
    54  			newRandString(5),
    55  			newRandString(10),
    56  		})
    57  	}
    58  
    59  	csvWriter.Flush()
    60  	return buf.Bytes()
    61  }
    62  
    63  type nullResponseWriter struct {
    64  }
    65  
    66  func (w *nullResponseWriter) Header() http.Header {
    67  	return nil
    68  }
    69  
    70  func (w *nullResponseWriter) Write(p []byte) (int, error) {
    71  	return len(p), nil
    72  }
    73  
    74  func (w *nullResponseWriter) WriteHeader(statusCode int) {
    75  }
    76  
    77  func (w *nullResponseWriter) Flush() {
    78  }
    79  
    80  func benchmarkSelect(b *testing.B, count int, query string) {
    81  	var requestXML = []byte(`
    82  <?xml version="1.0" encoding="UTF-8"?>
    83  <SelectObjectContentRequest>
    84      <Expression>` + query + `</Expression>
    85      <ExpressionType>SQL</ExpressionType>
    86      <InputSerialization>
    87          <CompressionType>NONE</CompressionType>
    88          <CSV>
    89              <FileHeaderInfo>USE</FileHeaderInfo>
    90          </CSV>
    91      </InputSerialization>
    92      <OutputSerialization>
    93          <CSV>
    94          </CSV>
    95      </OutputSerialization>
    96      <RequestProgress>
    97          <Enabled>FALSE</Enabled>
    98      </RequestProgress>
    99  </SelectObjectContentRequest>
   100  `)
   101  
   102  	csvData := genSampleCSVData(count)
   103  
   104  	b.ResetTimer()
   105  	b.ReportAllocs()
   106  	b.SetBytes(int64(count))
   107  
   108  	b.RunParallel(func(pb *testing.PB) {
   109  		for pb.Next() {
   110  			s3Select, err := NewS3Select(bytes.NewReader(requestXML))
   111  			if err != nil {
   112  				b.Fatal(err)
   113  			}
   114  
   115  			if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
   116  				return ioutil.NopCloser(bytes.NewReader(csvData)), nil
   117  			}); err != nil {
   118  				b.Fatal(err)
   119  			}
   120  
   121  			s3Select.Evaluate(&nullResponseWriter{})
   122  			s3Select.Close()
   123  		}
   124  	})
   125  }
   126  
   127  func benchmarkSelectAll(b *testing.B, count int) {
   128  	benchmarkSelect(b, count, "select * from S3Object")
   129  }
   130  
   131  // BenchmarkSelectAll_100K - benchmark * function with 100k records.
   132  func BenchmarkSelectAll_100K(b *testing.B) {
   133  	benchmarkSelectAll(b, 100*humanize.KiByte)
   134  }
   135  
   136  // BenchmarkSelectAll_1M - benchmark * function with 1m records.
   137  func BenchmarkSelectAll_1M(b *testing.B) {
   138  	benchmarkSelectAll(b, 1*humanize.MiByte)
   139  }
   140  
   141  // BenchmarkSelectAll_2M - benchmark * function with 2m records.
   142  func BenchmarkSelectAll_2M(b *testing.B) {
   143  	benchmarkSelectAll(b, 2*humanize.MiByte)
   144  }
   145  
   146  // BenchmarkSelectAll_10M - benchmark * function with 10m records.
   147  func BenchmarkSelectAll_10M(b *testing.B) {
   148  	benchmarkSelectAll(b, 10*humanize.MiByte)
   149  }
   150  
   151  func benchmarkSingleCol(b *testing.B, count int) {
   152  	benchmarkSelect(b, count, "select id from S3Object")
   153  }
   154  
   155  // BenchmarkSingleRow_100K - benchmark SELECT column function with 100k records.
   156  func BenchmarkSingleCol_100K(b *testing.B) {
   157  	benchmarkSingleCol(b, 1e5)
   158  }
   159  
   160  // BenchmarkSelectAll_1M - benchmark * function with 1m records.
   161  func BenchmarkSingleCol_1M(b *testing.B) {
   162  	benchmarkSingleCol(b, 1e6)
   163  }
   164  
   165  // BenchmarkSelectAll_2M - benchmark * function with 2m records.
   166  func BenchmarkSingleCol_2M(b *testing.B) {
   167  	benchmarkSingleCol(b, 2e6)
   168  }
   169  
   170  // BenchmarkSelectAll_10M - benchmark * function with 10m records.
   171  func BenchmarkSingleCol_10M(b *testing.B) {
   172  	benchmarkSingleCol(b, 1e7)
   173  }
   174  
   175  func benchmarkAggregateCount(b *testing.B, count int) {
   176  	benchmarkSelect(b, count, "select count(*) from S3Object")
   177  }
   178  
   179  // BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records.
   180  func BenchmarkAggregateCount_100K(b *testing.B) {
   181  	benchmarkAggregateCount(b, 100*humanize.KiByte)
   182  }
   183  
   184  // BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records.
   185  func BenchmarkAggregateCount_1M(b *testing.B) {
   186  	benchmarkAggregateCount(b, 1*humanize.MiByte)
   187  }
   188  
   189  // BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records.
   190  func BenchmarkAggregateCount_2M(b *testing.B) {
   191  	benchmarkAggregateCount(b, 2*humanize.MiByte)
   192  }
   193  
   194  // BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records.
   195  func BenchmarkAggregateCount_10M(b *testing.B) {
   196  	benchmarkAggregateCount(b, 10*humanize.MiByte)
   197  }