github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/performance/benchmarks/seed_schema.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package main
    16  
    17  import (
    18  	"fmt"
    19  	"log"
    20  	"strings"
    21  
    22  	"github.com/dolthub/dolt/go/store/types"
    23  )
    24  
    25  const (
    26  	csvExt  = ".csv"
    27  	jsonExt = ".json"
    28  	sqlExt  = ".sql"
    29  
    30  	increment = GenType("increment")
    31  	random    = GenType("random")
    32  	supplied  = GenType("supplied")
    33  )
    34  
    35  var supportedFormats = []string{csvExt, jsonExt, sqlExt}
    36  
    37  // GenType specifies how to generate subsequent row values for a given SeedColumn, for a test dataset
    38  type GenType string
    39  
    40  // SeedSchema contains the schema to be used to generate a test Dataset
    41  type SeedSchema struct {
    42  	// Rows is size of the Dataset
    43  	Rows int
    44  
    45  	// Columns are the schema for the columns to be used for the Dataset
    46  	Columns []*SeedColumn
    47  
    48  	// FileFormatExt is the file format extension that directs how to construct the Dataset
    49  	// as a string or as bytes
    50  	FileFormatExt string
    51  }
    52  
    53  // NewSeedSchema creates a new SeedSchema
    54  func NewSeedSchema(rows int, cols []*SeedColumn, format string) *SeedSchema {
    55  	for _, frmt := range supportedFormats {
    56  		if format == frmt {
    57  			return &SeedSchema{
    58  				Rows:          rows,
    59  				Columns:       cols,
    60  				FileFormatExt: format,
    61  			}
    62  		}
    63  	}
    64  	log.Fatalf("cannot build seed schema with unsupported file format %s \n", format)
    65  	return &SeedSchema{}
    66  }
    67  
    68  // Bytes returns a byte slice formatted according to the SeedSchema'a FileFormatExt
    69  func (sch *SeedSchema) Bytes() []byte {
    70  	switch sch.FileFormatExt {
    71  	case jsonExt:
    72  		return getColSchemaJSON(sch.Columns)
    73  	default:
    74  		log.Fatalf("cannot create bytes from schema, unsupported format %s \n", sch.FileFormatExt)
    75  	}
    76  	return []byte{}
    77  }
    78  
    79  // SeedColumn is used to create a column in a test dataset for benchmark testing
    80  type SeedColumn struct {
    81  	Name       string
    82  	PrimaryKey bool
    83  	Type       types.NomsKind
    84  	GenType    GenType
    85  }
    86  
    87  // NewSeedColumn creates a new SeedColumn
    88  func NewSeedColumn(name string, pk bool, t types.NomsKind, g GenType) *SeedColumn {
    89  	if isValidGenType(t, g) {
    90  		return &SeedColumn{
    91  			Name:       name,
    92  			PrimaryKey: pk,
    93  			Type:       t,
    94  			GenType:    g,
    95  		}
    96  	}
    97  	log.Fatalf("cannot use gen type %s with noms type %s \n", g, t.String())
    98  	return &SeedColumn{}
    99  }
   100  
   101  func isValidGenType(t types.NomsKind, g GenType) bool {
   102  	var validTypes []types.NomsKind
   103  	switch g {
   104  	case increment:
   105  		validTypes = []types.NomsKind{types.IntKind}
   106  	case random:
   107  		validTypes = []types.NomsKind{types.IntKind, types.StringKind}
   108  	case supplied:
   109  		validTypes = []types.NomsKind{
   110  			types.IntKind,
   111  			types.StringKind,
   112  			types.TimestampKind,
   113  		}
   114  	default:
   115  		log.Fatalf("unsupported gen type %s \n", g)
   116  	}
   117  	for _, v := range validTypes {
   118  		if t == v {
   119  			return true
   120  		}
   121  	}
   122  	return false
   123  }
   124  
   125  func getColSchemaJSON(seedCols []*SeedColumn) []byte {
   126  	prefix := "{\"Columns\":["
   127  	suffix := "]}"
   128  
   129  	statement := make([]string, 0)
   130  	statement = append(statement, prefix)
   131  
   132  	schemaStr := "{\"tag\": %d,\"name\":\"%s\",\"kind\":\"%s\",\"is_part_of_pk\":%v,\"col_constraints\":%s}"
   133  	jsonCols := make([]string, 0)
   134  
   135  	for i, sc := range seedCols {
   136  		var pks []string
   137  		if sc.PrimaryKey {
   138  			pks = []string{"{\"constraint_type\": \"not_null\",\"params\": null}"}
   139  		} else {
   140  			pks = []string{}
   141  		}
   142  		jc := fmt.Sprintf(schemaStr, uint64(i), sc.Name, strings.ToLower(sc.Type.String()), sc.PrimaryKey, pks)
   143  		jsonCols = append(jsonCols, jc)
   144  	}
   145  
   146  	statement = append(statement, strings.Join(jsonCols, ","))
   147  	statement = append(statement, suffix)
   148  	return []byte(strings.Join(statement, ""))
   149  }
   150  
   151  func genSampleCols() []*SeedColumn {
   152  	return []*SeedColumn{
   153  		NewSeedColumn("id", true, types.IntKind, increment),
   154  		NewSeedColumn("int1", false, types.IntKind, random),
   155  		NewSeedColumn("int2", false, types.IntKind, increment),
   156  		NewSeedColumn("int3", false, types.IntKind, random),
   157  		NewSeedColumn("int4", false, types.IntKind, increment),
   158  		NewSeedColumn("int5", false, types.IntKind, increment),
   159  		NewSeedColumn("str1", false, types.StringKind, random),
   160  		NewSeedColumn("str2", false, types.StringKind, random),
   161  		NewSeedColumn("str3", false, types.StringKind, random),
   162  		NewSeedColumn("str4", false, types.StringKind, random),
   163  		NewSeedColumn("str5", false, types.StringKind, random),
   164  	}
   165  }