github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/select_benchmark_test.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package s3select 19 20 import ( 21 "bytes" 22 "encoding/csv" 23 "math/rand" 24 "net/http" 25 "strconv" 26 "testing" 27 "time" 28 29 humanize "github.com/dustin/go-humanize" 30 ) 31 32 const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 33 34 func newRandString(length int) string { 35 randSrc := rand.New(rand.NewSource(time.Now().UnixNano())) 36 37 b := make([]byte, length) 38 for i := range b { 39 b[i] = charset[randSrc.Intn(len(charset))] 40 } 41 return string(b) 42 } 43 44 func genSampleCSVData(count int) []byte { 45 buf := &bytes.Buffer{} 46 csvWriter := csv.NewWriter(buf) 47 csvWriter.Write([]string{"id", "name", "age", "city"}) 48 49 for i := 0; i < count; i++ { 50 csvWriter.Write([]string{ 51 strconv.Itoa(i), 52 newRandString(10), 53 newRandString(5), 54 newRandString(10), 55 }) 56 } 57 58 csvWriter.Flush() 59 return buf.Bytes() 60 } 61 62 type nullResponseWriter struct{} 63 64 func (w *nullResponseWriter) Header() http.Header { 65 return nil 66 } 67 68 func (w *nullResponseWriter) Write(p []byte) (int, error) { 69 return len(p), nil 70 } 71 72 func (w *nullResponseWriter) WriteHeader(statusCode int) { 73 } 74 75 func (w *nullResponseWriter) Flush() { 76 } 77 78 func benchmarkSelect(b *testing.B, count int, query string) { 79 requestXML := []byte(` 80 <?xml version="1.0" encoding="UTF-8"?> 81 <SelectObjectContentRequest> 82 <Expression>` + query + `</Expression> 83 <ExpressionType>SQL</ExpressionType> 84 <InputSerialization> 85 <CompressionType>NONE</CompressionType> 86 <CSV> 87 <FileHeaderInfo>USE</FileHeaderInfo> 88 </CSV> 89 </InputSerialization> 90 <OutputSerialization> 91 <CSV> 92 </CSV> 93 </OutputSerialization> 94 <RequestProgress> 95 <Enabled>FALSE</Enabled> 96 </RequestProgress> 97 </SelectObjectContentRequest> 98 `) 99 100 csvData := genSampleCSVData(count) 101 102 b.ResetTimer() 103 b.ReportAllocs() 104 b.SetBytes(int64(count)) 105 106 b.RunParallel(func(pb *testing.PB) { 107 for pb.Next() { 108 s3Select, err := NewS3Select(bytes.NewReader(requestXML)) 109 if err != nil { 110 b.Fatal(err) 111 } 112 113 if err = s3Select.Open(newBytesRSC(csvData)); err != nil { 114 b.Fatal(err) 115 } 116 117 s3Select.Evaluate(&nullResponseWriter{}) 118 s3Select.Close() 119 } 120 }) 121 } 122 123 func benchmarkSelectAll(b *testing.B, count int) { 124 benchmarkSelect(b, count, "select * from S3Object") 125 } 126 127 // BenchmarkSelectAll_100K - benchmark * function with 100k records. 128 func BenchmarkSelectAll_100K(b *testing.B) { 129 benchmarkSelectAll(b, 100*humanize.KiByte) 130 } 131 132 // BenchmarkSelectAll_1M - benchmark * function with 1m records. 133 func BenchmarkSelectAll_1M(b *testing.B) { 134 benchmarkSelectAll(b, 1*humanize.MiByte) 135 } 136 137 // BenchmarkSelectAll_2M - benchmark * function with 2m records. 138 func BenchmarkSelectAll_2M(b *testing.B) { 139 benchmarkSelectAll(b, 2*humanize.MiByte) 140 } 141 142 // BenchmarkSelectAll_10M - benchmark * function with 10m records. 143 func BenchmarkSelectAll_10M(b *testing.B) { 144 benchmarkSelectAll(b, 10*humanize.MiByte) 145 } 146 147 func benchmarkSingleCol(b *testing.B, count int) { 148 benchmarkSelect(b, count, "select id from S3Object") 149 } 150 151 // BenchmarkSingleRow_100K - benchmark SELECT column function with 100k records. 152 func BenchmarkSingleCol_100K(b *testing.B) { 153 benchmarkSingleCol(b, 1e5) 154 } 155 156 // BenchmarkSelectAll_1M - benchmark * function with 1m records. 157 func BenchmarkSingleCol_1M(b *testing.B) { 158 benchmarkSingleCol(b, 1e6) 159 } 160 161 // BenchmarkSelectAll_2M - benchmark * function with 2m records. 162 func BenchmarkSingleCol_2M(b *testing.B) { 163 benchmarkSingleCol(b, 2e6) 164 } 165 166 // BenchmarkSelectAll_10M - benchmark * function with 10m records. 167 func BenchmarkSingleCol_10M(b *testing.B) { 168 benchmarkSingleCol(b, 1e7) 169 } 170 171 func benchmarkAggregateCount(b *testing.B, count int) { 172 benchmarkSelect(b, count, "select count(*) from S3Object") 173 } 174 175 // BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records. 176 func BenchmarkAggregateCount_100K(b *testing.B) { 177 benchmarkAggregateCount(b, 100*humanize.KiByte) 178 } 179 180 // BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records. 181 func BenchmarkAggregateCount_1M(b *testing.B) { 182 benchmarkAggregateCount(b, 1*humanize.MiByte) 183 } 184 185 // BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records. 186 func BenchmarkAggregateCount_2M(b *testing.B) { 187 benchmarkAggregateCount(b, 2*humanize.MiByte) 188 } 189 190 // BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records. 191 func BenchmarkAggregateCount_10M(b *testing.B) { 192 benchmarkAggregateCount(b, 10*humanize.MiByte) 193 }