storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/select_benchmark_test.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package s3select 18 19 import ( 20 "bytes" 21 "encoding/csv" 22 "io" 23 "io/ioutil" 24 "math/rand" 25 "net/http" 26 "strconv" 27 "testing" 28 "time" 29 30 humanize "github.com/dustin/go-humanize" 31 ) 32 33 const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" 34 35 func newRandString(length int) string { 36 randSrc := rand.New(rand.NewSource(time.Now().UnixNano())) 37 38 b := make([]byte, length) 39 for i := range b { 40 b[i] = charset[randSrc.Intn(len(charset))] 41 } 42 return string(b) 43 } 44 45 func genSampleCSVData(count int) []byte { 46 buf := &bytes.Buffer{} 47 csvWriter := csv.NewWriter(buf) 48 csvWriter.Write([]string{"id", "name", "age", "city"}) 49 50 for i := 0; i < count; i++ { 51 csvWriter.Write([]string{ 52 strconv.Itoa(i), 53 newRandString(10), 54 newRandString(5), 55 newRandString(10), 56 }) 57 } 58 59 csvWriter.Flush() 60 return buf.Bytes() 61 } 62 63 type nullResponseWriter struct { 64 } 65 66 func (w *nullResponseWriter) Header() http.Header { 67 return nil 68 } 69 70 func (w *nullResponseWriter) Write(p []byte) (int, error) { 71 return len(p), nil 72 } 73 74 func (w *nullResponseWriter) WriteHeader(statusCode int) { 75 } 76 77 func (w *nullResponseWriter) Flush() { 78 } 79 80 func benchmarkSelect(b *testing.B, count int, query string) { 81 var requestXML = []byte(` 82 <?xml version="1.0" encoding="UTF-8"?> 83 <SelectObjectContentRequest> 84 <Expression>` + query + `</Expression> 85 <ExpressionType>SQL</ExpressionType> 86 <InputSerialization> 87 <CompressionType>NONE</CompressionType> 88 <CSV> 89 <FileHeaderInfo>USE</FileHeaderInfo> 90 </CSV> 91 </InputSerialization> 92 <OutputSerialization> 93 <CSV> 94 </CSV> 95 </OutputSerialization> 96 <RequestProgress> 97 <Enabled>FALSE</Enabled> 98 </RequestProgress> 99 </SelectObjectContentRequest> 100 `) 101 102 csvData := genSampleCSVData(count) 103 104 b.ResetTimer() 105 b.ReportAllocs() 106 b.SetBytes(int64(count)) 107 108 b.RunParallel(func(pb *testing.PB) { 109 for pb.Next() { 110 s3Select, err := NewS3Select(bytes.NewReader(requestXML)) 111 if err != nil { 112 b.Fatal(err) 113 } 114 115 if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) { 116 return ioutil.NopCloser(bytes.NewReader(csvData)), nil 117 }); err != nil { 118 b.Fatal(err) 119 } 120 121 s3Select.Evaluate(&nullResponseWriter{}) 122 s3Select.Close() 123 } 124 }) 125 } 126 127 func benchmarkSelectAll(b *testing.B, count int) { 128 benchmarkSelect(b, count, "select * from S3Object") 129 } 130 131 // BenchmarkSelectAll_100K - benchmark * function with 100k records. 132 func BenchmarkSelectAll_100K(b *testing.B) { 133 benchmarkSelectAll(b, 100*humanize.KiByte) 134 } 135 136 // BenchmarkSelectAll_1M - benchmark * function with 1m records. 137 func BenchmarkSelectAll_1M(b *testing.B) { 138 benchmarkSelectAll(b, 1*humanize.MiByte) 139 } 140 141 // BenchmarkSelectAll_2M - benchmark * function with 2m records. 142 func BenchmarkSelectAll_2M(b *testing.B) { 143 benchmarkSelectAll(b, 2*humanize.MiByte) 144 } 145 146 // BenchmarkSelectAll_10M - benchmark * function with 10m records. 147 func BenchmarkSelectAll_10M(b *testing.B) { 148 benchmarkSelectAll(b, 10*humanize.MiByte) 149 } 150 151 func benchmarkSingleCol(b *testing.B, count int) { 152 benchmarkSelect(b, count, "select id from S3Object") 153 } 154 155 // BenchmarkSingleRow_100K - benchmark SELECT column function with 100k records. 156 func BenchmarkSingleCol_100K(b *testing.B) { 157 benchmarkSingleCol(b, 1e5) 158 } 159 160 // BenchmarkSelectAll_1M - benchmark * function with 1m records. 161 func BenchmarkSingleCol_1M(b *testing.B) { 162 benchmarkSingleCol(b, 1e6) 163 } 164 165 // BenchmarkSelectAll_2M - benchmark * function with 2m records. 166 func BenchmarkSingleCol_2M(b *testing.B) { 167 benchmarkSingleCol(b, 2e6) 168 } 169 170 // BenchmarkSelectAll_10M - benchmark * function with 10m records. 171 func BenchmarkSingleCol_10M(b *testing.B) { 172 benchmarkSingleCol(b, 1e7) 173 } 174 175 func benchmarkAggregateCount(b *testing.B, count int) { 176 benchmarkSelect(b, count, "select count(*) from S3Object") 177 } 178 179 // BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records. 180 func BenchmarkAggregateCount_100K(b *testing.B) { 181 benchmarkAggregateCount(b, 100*humanize.KiByte) 182 } 183 184 // BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records. 185 func BenchmarkAggregateCount_1M(b *testing.B) { 186 benchmarkAggregateCount(b, 1*humanize.MiByte) 187 } 188 189 // BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records. 190 func BenchmarkAggregateCount_2M(b *testing.B) { 191 benchmarkAggregateCount(b, 2*humanize.MiByte) 192 } 193 194 // BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records. 195 func BenchmarkAggregateCount_10M(b *testing.B) { 196 benchmarkAggregateCount(b, 10*humanize.MiByte) 197 }