github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/select_benchmark_test.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package s3select
    19  
    20  import (
    21  	"bytes"
    22  	"encoding/csv"
    23  	"math/rand"
    24  	"net/http"
    25  	"strconv"
    26  	"testing"
    27  	"time"
    28  
    29  	humanize "github.com/dustin/go-humanize"
    30  )
    31  
    32  const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    33  
    34  func newRandString(length int) string {
    35  	randSrc := rand.New(rand.NewSource(time.Now().UnixNano()))
    36  
    37  	b := make([]byte, length)
    38  	for i := range b {
    39  		b[i] = charset[randSrc.Intn(len(charset))]
    40  	}
    41  	return string(b)
    42  }
    43  
    44  func genSampleCSVData(count int) []byte {
    45  	buf := &bytes.Buffer{}
    46  	csvWriter := csv.NewWriter(buf)
    47  	csvWriter.Write([]string{"id", "name", "age", "city"})
    48  
    49  	for i := 0; i < count; i++ {
    50  		csvWriter.Write([]string{
    51  			strconv.Itoa(i),
    52  			newRandString(10),
    53  			newRandString(5),
    54  			newRandString(10),
    55  		})
    56  	}
    57  
    58  	csvWriter.Flush()
    59  	return buf.Bytes()
    60  }
    61  
    62  type nullResponseWriter struct{}
    63  
    64  func (w *nullResponseWriter) Header() http.Header {
    65  	return nil
    66  }
    67  
    68  func (w *nullResponseWriter) Write(p []byte) (int, error) {
    69  	return len(p), nil
    70  }
    71  
    72  func (w *nullResponseWriter) WriteHeader(statusCode int) {
    73  }
    74  
    75  func (w *nullResponseWriter) Flush() {
    76  }
    77  
    78  func benchmarkSelect(b *testing.B, count int, query string) {
    79  	requestXML := []byte(`
    80  <?xml version="1.0" encoding="UTF-8"?>
    81  <SelectObjectContentRequest>
    82      <Expression>` + query + `</Expression>
    83      <ExpressionType>SQL</ExpressionType>
    84      <InputSerialization>
    85          <CompressionType>NONE</CompressionType>
    86          <CSV>
    87              <FileHeaderInfo>USE</FileHeaderInfo>
    88          </CSV>
    89      </InputSerialization>
    90      <OutputSerialization>
    91          <CSV>
    92          </CSV>
    93      </OutputSerialization>
    94      <RequestProgress>
    95          <Enabled>FALSE</Enabled>
    96      </RequestProgress>
    97  </SelectObjectContentRequest>
    98  `)
    99  
   100  	csvData := genSampleCSVData(count)
   101  
   102  	b.ResetTimer()
   103  	b.ReportAllocs()
   104  	b.SetBytes(int64(count))
   105  
   106  	b.RunParallel(func(pb *testing.PB) {
   107  		for pb.Next() {
   108  			s3Select, err := NewS3Select(bytes.NewReader(requestXML))
   109  			if err != nil {
   110  				b.Fatal(err)
   111  			}
   112  
   113  			if err = s3Select.Open(newBytesRSC(csvData)); err != nil {
   114  				b.Fatal(err)
   115  			}
   116  
   117  			s3Select.Evaluate(&nullResponseWriter{})
   118  			s3Select.Close()
   119  		}
   120  	})
   121  }
   122  
   123  func benchmarkSelectAll(b *testing.B, count int) {
   124  	benchmarkSelect(b, count, "select * from S3Object")
   125  }
   126  
   127  // BenchmarkSelectAll_100K - benchmark * function with 100k records.
   128  func BenchmarkSelectAll_100K(b *testing.B) {
   129  	benchmarkSelectAll(b, 100*humanize.KiByte)
   130  }
   131  
   132  // BenchmarkSelectAll_1M - benchmark * function with 1m records.
   133  func BenchmarkSelectAll_1M(b *testing.B) {
   134  	benchmarkSelectAll(b, 1*humanize.MiByte)
   135  }
   136  
   137  // BenchmarkSelectAll_2M - benchmark * function with 2m records.
   138  func BenchmarkSelectAll_2M(b *testing.B) {
   139  	benchmarkSelectAll(b, 2*humanize.MiByte)
   140  }
   141  
   142  // BenchmarkSelectAll_10M - benchmark * function with 10m records.
   143  func BenchmarkSelectAll_10M(b *testing.B) {
   144  	benchmarkSelectAll(b, 10*humanize.MiByte)
   145  }
   146  
   147  func benchmarkSingleCol(b *testing.B, count int) {
   148  	benchmarkSelect(b, count, "select id from S3Object")
   149  }
   150  
   151  // BenchmarkSingleRow_100K - benchmark SELECT column function with 100k records.
   152  func BenchmarkSingleCol_100K(b *testing.B) {
   153  	benchmarkSingleCol(b, 1e5)
   154  }
   155  
   156  // BenchmarkSelectAll_1M - benchmark * function with 1m records.
   157  func BenchmarkSingleCol_1M(b *testing.B) {
   158  	benchmarkSingleCol(b, 1e6)
   159  }
   160  
   161  // BenchmarkSelectAll_2M - benchmark * function with 2m records.
   162  func BenchmarkSingleCol_2M(b *testing.B) {
   163  	benchmarkSingleCol(b, 2e6)
   164  }
   165  
   166  // BenchmarkSelectAll_10M - benchmark * function with 10m records.
   167  func BenchmarkSingleCol_10M(b *testing.B) {
   168  	benchmarkSingleCol(b, 1e7)
   169  }
   170  
   171  func benchmarkAggregateCount(b *testing.B, count int) {
   172  	benchmarkSelect(b, count, "select count(*) from S3Object")
   173  }
   174  
   175  // BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records.
   176  func BenchmarkAggregateCount_100K(b *testing.B) {
   177  	benchmarkAggregateCount(b, 100*humanize.KiByte)
   178  }
   179  
   180  // BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records.
   181  func BenchmarkAggregateCount_1M(b *testing.B) {
   182  	benchmarkAggregateCount(b, 1*humanize.MiByte)
   183  }
   184  
   185  // BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records.
   186  func BenchmarkAggregateCount_2M(b *testing.B) {
   187  	benchmarkAggregateCount(b, 2*humanize.MiByte)
   188  }
   189  
   190  // BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records.
   191  func BenchmarkAggregateCount_10M(b *testing.B) {
   192  	benchmarkAggregateCount(b, 10*humanize.MiByte)
   193  }