github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/performance/benchmarks/dataset.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "io" 19 "log" 20 "strings" 21 ) 22 23 // Dataset is a set of test data used for benchmark testing 24 type Dataset interface { 25 // GenerateData generates a dataset for testing 26 GenerateData() 27 28 // Change returns a Dataset mutated by the given percentage of change 29 Change(pct float32) Dataset 30 } 31 32 // DSImpl implements the Dataset interface 33 type DSImpl struct { 34 // Schema defines the structure of the Dataset 35 Schema *SeedSchema 36 37 // TableName is the name of the test dataset 38 TableName string 39 40 // w is the writer where the test dataset will be written 41 w io.Writer 42 43 // sf is the function used to generate random data values in the dataset 44 sf seedFunc 45 } 46 47 // NewDSImpl creates a new DSImpl 48 func NewDSImpl(wc io.Writer, sch *SeedSchema, sf seedFunc, tableName string) *DSImpl { 49 return &DSImpl{Schema: sch, TableName: tableName, sf: sf, w: wc} 50 } 51 52 // GenerateData generates a dataset and writes it to a io.Writer 53 func (ds *DSImpl) GenerateData() { 54 writeDataToWriter(ds.w, ds.Schema.Rows, ds.Schema.Columns, ds.sf, ds.TableName, ds.Schema.FileFormatExt) 55 } 56 57 // Change returns a DataSet that is a mutation of this Dataset by the given percentage 58 func (ds *DSImpl) Change(pct float32) Dataset { 59 // TODO 60 return &DSImpl{} 61 } 62 63 func writeDataToWriter(wc io.Writer, rows int, cols []*SeedColumn, sf seedFunc, tableName, format string) { 64 // handle the "header" for all format types 65 writeHeader(wc, cols, tableName, format) 66 67 var prevRow []string 68 for i := 0; i < rows; i++ { 69 row := make([]string, len(cols)) 70 71 for colIndex, col := range cols { 72 val := getColValue(prevRow, colIndex, col, sf, format) 73 row[colIndex] = val 74 75 if i > 0 && prevRow != nil { 76 prevRow[colIndex] = val 77 } 78 } 79 _, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format))) 80 if err != nil { 81 log.Fatal(err) 82 } 83 prevRow = row[:] 84 } 85 86 // handle the "footer" for format types 87 switch format { 88 case jsonExt: 89 suffix := "]}\n" 90 _, err := wc.Write([]byte(suffix)) 91 if err != nil { 92 log.Fatal(err) 93 } 94 default: 95 } 96 } 97 98 func writeHeader(w io.Writer, cols []*SeedColumn, tableName, format string) { 99 switch format { 100 case csvExt: 101 header := makeCSVHeaderStr(cols, tableName, format) 102 _, err := w.Write([]byte(header + "\n")) 103 if err != nil { 104 log.Fatal(err) 105 } 106 case sqlExt: 107 header := getSQLHeader(cols, tableName, format) 108 _, err := w.Write([]byte(header + "\n")) 109 if err != nil { 110 log.Fatal(err) 111 } 112 case jsonExt: 113 prefix := "{\"Rows\":[" 114 _, err := w.Write([]byte(prefix)) 115 if err != nil { 116 log.Fatal(err) 117 } 118 default: 119 log.Fatalf("unable to write the header, unsupported format %v \n", format) 120 } 121 } 122 123 func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string { 124 switch format { 125 case csvExt: 126 return strings.Join(strs, ",") + "\n" 127 case sqlExt: 128 return getSQLRow(strs, cols, tableName) + "\n" 129 case jsonExt: 130 var suffix string 131 if currentRowIdx == lastRowIdx { 132 suffix = "\n" 133 } else { 134 suffix = ",\n" 135 } 136 return getJSONRow(strs, cols) + suffix 137 default: 138 log.Fatalf("cannot format row, unsupported file format %s \n", format) 139 } 140 return "" 141 } 142 143 func makeCSVHeaderStr(cols []*SeedColumn, tableName, format string) string { 144 str := make([]string, 0, len(cols)) 145 for _, col := range cols { 146 str = append(str, col.Name) 147 } 148 return formatRow(str, cols, 0, 1, tableName, format) 149 }