github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/performance/benchmarks/dataset.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"io"
    19  	"log"
    20  	"strings"
    21  )
    22  
    23  // Dataset is a set of test data used for benchmark testing
    24  type Dataset interface {
    25  	// GenerateData generates a dataset for testing
    26  	GenerateData()
    27  
    28  	// Change returns a Dataset mutated by the given percentage of change
    29  	Change(pct float32) Dataset
    30  }
    31  
    32  // DSImpl implements the Dataset interface
    33  type DSImpl struct {
    34  	// Schema defines the structure of the Dataset
    35  	Schema *SeedSchema
    36  
    37  	// TableName is the name of the test dataset
    38  	TableName string
    39  
    40  	// w is the writer where the test dataset will be written
    41  	w io.Writer
    42  
    43  	// sf is the function used to generate random data values in the dataset
    44  	sf seedFunc
    45  }
    46  
    47  // NewDSImpl creates a new DSImpl
    48  func NewDSImpl(wc io.Writer, sch *SeedSchema, sf seedFunc, tableName string) *DSImpl {
    49  	return &DSImpl{Schema: sch, TableName: tableName, sf: sf, w: wc}
    50  }
    51  
    52  // GenerateData generates a dataset and writes it to a io.Writer
    53  func (ds *DSImpl) GenerateData() {
    54  	writeDataToWriter(ds.w, ds.Schema.Rows, ds.Schema.Columns, ds.sf, ds.TableName, ds.Schema.FileFormatExt)
    55  }
    56  
    57  // Change returns a DataSet that is a mutation of this Dataset by the given percentage
    58  func (ds *DSImpl) Change(pct float32) Dataset {
    59  	// TODO
    60  	return &DSImpl{}
    61  }
    62  
    63  func writeDataToWriter(wc io.Writer, rows int, cols []*SeedColumn, sf seedFunc, tableName, format string) {
    64  	// handle the "header" for all format types
    65  	writeHeader(wc, cols, tableName, format)
    66  
    67  	var prevRow []string
    68  	for i := 0; i < rows; i++ {
    69  		row := make([]string, len(cols))
    70  
    71  		for colIndex, col := range cols {
    72  			val := getColValue(prevRow, colIndex, col, sf, format)
    73  			row[colIndex] = val
    74  
    75  			if i > 0 && prevRow != nil {
    76  				prevRow[colIndex] = val
    77  			}
    78  		}
    79  		_, err := wc.Write([]byte(formatRow(row, cols, i, rows-1, tableName, format)))
    80  		if err != nil {
    81  			log.Fatal(err)
    82  		}
    83  		prevRow = row[:]
    84  	}
    85  
    86  	// handle the "footer" for format types
    87  	switch format {
    88  	case jsonExt:
    89  		suffix := "]}\n"
    90  		_, err := wc.Write([]byte(suffix))
    91  		if err != nil {
    92  			log.Fatal(err)
    93  		}
    94  	default:
    95  	}
    96  }
    97  
    98  func writeHeader(w io.Writer, cols []*SeedColumn, tableName, format string) {
    99  	switch format {
   100  	case csvExt:
   101  		header := makeCSVHeaderStr(cols, tableName, format)
   102  		_, err := w.Write([]byte(header + "\n"))
   103  		if err != nil {
   104  			log.Fatal(err)
   105  		}
   106  	case sqlExt:
   107  		header := getSQLHeader(cols, tableName, format)
   108  		_, err := w.Write([]byte(header + "\n"))
   109  		if err != nil {
   110  			log.Fatal(err)
   111  		}
   112  	case jsonExt:
   113  		prefix := "{\"Rows\":["
   114  		_, err := w.Write([]byte(prefix))
   115  		if err != nil {
   116  			log.Fatal(err)
   117  		}
   118  	default:
   119  		log.Fatalf("unable to write the header, unsupported format %v \n", format)
   120  	}
   121  }
   122  
   123  func formatRow(strs []string, cols []*SeedColumn, currentRowIdx, lastRowIdx int, tableName, format string) string {
   124  	switch format {
   125  	case csvExt:
   126  		return strings.Join(strs, ",") + "\n"
   127  	case sqlExt:
   128  		return getSQLRow(strs, cols, tableName) + "\n"
   129  	case jsonExt:
   130  		var suffix string
   131  		if currentRowIdx == lastRowIdx {
   132  			suffix = "\n"
   133  		} else {
   134  			suffix = ",\n"
   135  		}
   136  		return getJSONRow(strs, cols) + suffix
   137  	default:
   138  		log.Fatalf("cannot format row, unsupported file format %s \n", format)
   139  	}
   140  	return ""
   141  }
   142  
   143  func makeCSVHeaderStr(cols []*SeedColumn, tableName, format string) string {
   144  	str := make([]string, 0, len(cols))
   145  	for _, col := range cols {
   146  		str = append(str, col.Name)
   147  	}
   148  	return formatRow(str, cols, 0, 1, tableName, format)
   149  }