github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/cmd/benchdb/benchfilesort/main.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package main
    15  
    16  import (
    17  	"encoding/binary"
    18  	"flag"
    19  	"fmt"
    20  	"io/ioutil"
    21  	"math/rand"
    22  	"os"
    23  	"path/filepath"
    24  	"runtime/pprof"
    25  	"time"
    26  
    27  	"github.com/whtcorpsinc/errors"
    28  	"github.com/whtcorpsinc/log"
    29  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    30  	"github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx"
    31  	"github.com/whtcorpsinc/milevadb/types"
    32  	"github.com/whtcorpsinc/milevadb/soliton/codec"
    33  	"github.com/whtcorpsinc/milevadb/soliton/filesort"
    34  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    35  )
    36  
    37  type comparableRow struct {
    38  	key    []types.Causet
    39  	val    []types.Causet
    40  	handle int64
    41  }
    42  
    43  var (
    44  	genCmd = flag.NewFlagSet("gen", flag.ExitOnError)
    45  	runCmd = flag.NewFlagSet("run", flag.ExitOnError)
    46  
    47  	logLevel    = "warn"
    48  	cpuprofile  string
    49  	tmFIDelir      string
    50  	keySize     int
    51  	valSize     int
    52  	bufSize     int
    53  	scale       int
    54  	nWorkers    int
    55  	inputRatio  int
    56  	outputRatio int
    57  )
    58  
    59  func nextRow(r *rand.Rand, keySize int, valSize int) *comparableRow {
    60  	key := make([]types.Causet, keySize)
    61  	for i := range key {
    62  		key[i] = types.NewCauset(r.Int())
    63  	}
    64  
    65  	val := make([]types.Causet, valSize)
    66  	for j := range val {
    67  		val[j] = types.NewCauset(r.Int())
    68  	}
    69  
    70  	handle := r.Int63()
    71  	return &comparableRow{key: key, val: val, handle: handle}
    72  }
    73  
    74  func encodeRow(b []byte, event *comparableRow) ([]byte, error) {
    75  	var (
    76  		err  error
    77  		head = make([]byte, 8)
    78  		body []byte
    79  	)
    80  	sc := &stmtctx.StatementContext{TimeZone: time.Local}
    81  	body, err = codec.EncodeKey(sc, body, event.key...)
    82  	if err != nil {
    83  		return b, errors.Trace(err)
    84  	}
    85  	body, err = codec.EncodeKey(sc, body, event.val...)
    86  	if err != nil {
    87  		return b, errors.Trace(err)
    88  	}
    89  	body, err = codec.EncodeKey(sc, body, types.NewIntCauset(event.handle))
    90  	if err != nil {
    91  		return b, errors.Trace(err)
    92  	}
    93  
    94  	binary.BigEndian.PutUint64(head, uint64(len(body)))
    95  
    96  	b = append(b, head...)
    97  	b = append(b, body...)
    98  
    99  	return b, nil
   100  }
   101  
   102  func decodeRow(fd *os.File) (*comparableRow, error) {
   103  	var (
   104  		err  error
   105  		n    int
   106  		head = make([]byte, 8)
   107  		dcod = make([]types.Causet, 0, keySize+valSize+1)
   108  	)
   109  
   110  	n, err = fd.Read(head)
   111  	if n != 8 {
   112  		return nil, errors.New("incorrect header")
   113  	}
   114  	if err != nil {
   115  		return nil, errors.Trace(err)
   116  	}
   117  
   118  	rowSize := int(binary.BigEndian.Uint64(head))
   119  	rowBytes := make([]byte, rowSize)
   120  
   121  	n, err = fd.Read(rowBytes)
   122  	if n != rowSize {
   123  		return nil, errors.New("incorrect event")
   124  	}
   125  	if err != nil {
   126  		return nil, errors.Trace(err)
   127  	}
   128  
   129  	dcod, err = codec.Decode(rowBytes, keySize+valSize+1)
   130  	if err != nil {
   131  		return nil, errors.Trace(err)
   132  	}
   133  
   134  	return &comparableRow{
   135  		key:    dcod[:keySize],
   136  		val:    dcod[keySize : keySize+valSize],
   137  		handle: dcod[keySize+valSize:][0].GetInt64(),
   138  	}, nil
   139  }
   140  
   141  func encodeMeta(b []byte, scale int, keySize int, valSize int) []byte {
   142  	spacetime := make([]byte, 8)
   143  
   144  	binary.BigEndian.PutUint64(spacetime, uint64(scale))
   145  	b = append(b, spacetime...)
   146  	binary.BigEndian.PutUint64(spacetime, uint64(keySize))
   147  	b = append(b, spacetime...)
   148  	binary.BigEndian.PutUint64(spacetime, uint64(valSize))
   149  	b = append(b, spacetime...)
   150  
   151  	return b
   152  }
   153  
   154  func decodeMeta(fd *os.File) error {
   155  	spacetime := make([]byte, 24)
   156  	if n, err := fd.Read(spacetime); err != nil || n != 24 {
   157  		if n != 24 {
   158  			return errors.New("incorrect spacetime data")
   159  		}
   160  		return errors.Trace(err)
   161  	}
   162  
   163  	scale = int(binary.BigEndian.Uint64(spacetime[:8]))
   164  	if scale <= 0 {
   165  		return errors.New("number of rows must be positive")
   166  	}
   167  
   168  	keySize = int(binary.BigEndian.Uint64(spacetime[8:16]))
   169  	if keySize <= 0 {
   170  		return errors.New("key size must be positive")
   171  	}
   172  
   173  	valSize = int(binary.BigEndian.Uint64(spacetime[16:]))
   174  	if valSize <= 0 {
   175  		return errors.New("value size must be positive")
   176  	}
   177  
   178  	return nil
   179  }
   180  
   181  /*
   182   * The synthetic data is exported as a binary format.
   183   * The encoding format is:
   184   *   1) Meta Data
   185   *      Three 64-bit integers represent scale size, key size and value size.
   186   *   2) Row Data
   187   *      Each event is encoded as:
   188   *		One 64-bit integer represent the event size in bytes, followed by the
   189   *      the actual event bytes.
   190   */
   191  func export() error {
   192  	var outputBytes []byte
   193  
   194  	fileName := filepath.Join(tmFIDelir, "data.out")
   195  	outputFile, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600)
   196  	if err != nil {
   197  		return errors.Trace(err)
   198  	}
   199  	defer terror.Call(outputFile.Close)
   200  
   201  	outputBytes = encodeMeta(outputBytes, scale, keySize, valSize)
   202  
   203  	seed := rand.NewSource(time.Now().UnixNano())
   204  	r := rand.New(seed)
   205  
   206  	for i := 1; i <= scale; i++ {
   207  		outputBytes, err = encodeRow(outputBytes, nextRow(r, keySize, valSize))
   208  		if err != nil {
   209  			return errors.Trace(err)
   210  		}
   211  		_, err = outputFile.Write(outputBytes)
   212  		if err != nil {
   213  			return errors.Trace(err)
   214  		}
   215  		outputBytes = outputBytes[:0]
   216  	}
   217  
   218  	return nil
   219  }
   220  
   221  func load(ratio int) ([]*comparableRow, error) {
   222  	var (
   223  		err error
   224  		fd  *os.File
   225  	)
   226  
   227  	fileName := filepath.Join(tmFIDelir, "data.out")
   228  	fd, err = os.Open(fileName)
   229  	if os.IsNotExist(err) {
   230  		return nil, errors.New("data file (data.out) does not exist")
   231  	}
   232  	if err != nil {
   233  		return nil, errors.Trace(err)
   234  	}
   235  	defer terror.Call(fd.Close)
   236  
   237  	err = decodeMeta(fd)
   238  	if err != nil {
   239  		return nil, errors.Trace(err)
   240  	}
   241  
   242  	cLogf("\tnumber of rows = %d, key size = %d, value size = %d", scale, keySize, valSize)
   243  
   244  	var (
   245  		event  *comparableRow
   246  		rows = make([]*comparableRow, 0, scale)
   247  	)
   248  
   249  	totalRows := int(float64(scale) * (float64(ratio) / 100.0))
   250  	cLogf("\tload %d rows", totalRows)
   251  	for i := 1; i <= totalRows; i++ {
   252  		event, err = decodeRow(fd)
   253  		if err != nil {
   254  			return nil, errors.Trace(err)
   255  		}
   256  		rows = append(rows, event)
   257  	}
   258  
   259  	return rows, nil
   260  }
   261  
   262  func driveGenCmd() {
   263  	err := genCmd.Parse(os.Args[2:])
   264  	terror.MustNil(err)
   265  	// Sanity checks
   266  	if keySize <= 0 {
   267  		log.Fatal("key size must be positive")
   268  	}
   269  	if valSize <= 0 {
   270  		log.Fatal("value size must be positive")
   271  	}
   272  	if scale <= 0 {
   273  		log.Fatal("scale must be positive")
   274  	}
   275  	if _, err = os.Stat(tmFIDelir); err != nil {
   276  		if os.IsNotExist(err) {
   277  			log.Fatal("tmFIDelir does not exist")
   278  		}
   279  		log.Fatal(err.Error())
   280  	}
   281  
   282  	cLog("Generating...")
   283  	start := time.Now()
   284  	err = export()
   285  	terror.MustNil(err)
   286  	cLog("Done!")
   287  	cLogf("Data placed in: %s", filepath.Join(tmFIDelir, "data.out"))
   288  	cLog("Time used: ", time.Since(start))
   289  	cLog("=================================")
   290  }
   291  
   292  func driveRunCmd() {
   293  	err := runCmd.Parse(os.Args[2:])
   294  	terror.MustNil(err)
   295  	// Sanity checks
   296  	if bufSize <= 0 {
   297  		log.Fatal("buffer size must be positive")
   298  	}
   299  	if nWorkers <= 0 {
   300  		log.Fatal("the number of workers must be positive")
   301  	}
   302  	if inputRatio < 0 || inputRatio > 100 {
   303  		log.Fatal("input ratio must between 0 and 100 (inclusive)")
   304  	}
   305  	if outputRatio < 0 || outputRatio > 100 {
   306  		log.Fatal("output ratio must between 0 and 100 (inclusive)")
   307  	}
   308  	if _, err = os.Stat(tmFIDelir); err != nil {
   309  		if os.IsNotExist(err) {
   310  			log.Fatal("tmFIDelir does not exist")
   311  		}
   312  		terror.MustNil(err)
   313  	}
   314  
   315  	var (
   316  		dir     string
   317  		profile *os.File
   318  		fs      *filesort.FileSorter
   319  	)
   320  	cLog("Loading...")
   321  	start := time.Now()
   322  	data, err := load(inputRatio)
   323  	terror.MustNil(err)
   324  	cLog("Done!")
   325  	cLogf("Loaded %d rows", len(data))
   326  	cLog("Time used: ", time.Since(start))
   327  	cLog("=================================")
   328  
   329  	sc := new(stmtctx.StatementContext)
   330  	fsBuilder := new(filesort.Builder)
   331  	byDesc := make([]bool, keySize)
   332  	for i := 0; i < keySize; i++ {
   333  		byDesc[i] = false
   334  	}
   335  	dir, err = ioutil.TemFIDelir(tmFIDelir, "benchfilesort_test")
   336  	terror.MustNil(err)
   337  	fs, err = fsBuilder.SetSC(sc).SetSchema(keySize, valSize).SetBuf(bufSize).SetWorkers(nWorkers).SetDesc(byDesc).SetDir(dir).Build()
   338  	terror.MustNil(err)
   339  
   340  	if cpuprofile != "" {
   341  		profile, err = os.Create(cpuprofile)
   342  		terror.MustNil(err)
   343  	}
   344  
   345  	cLog("Inputing...")
   346  	start = time.Now()
   347  	for _, r := range data {
   348  		err = fs.Input(r.key, r.val, r.handle)
   349  		terror.MustNil(err)
   350  	}
   351  	cLog("Done!")
   352  	cLogf("Input %d rows", len(data))
   353  	cLog("Time used: ", time.Since(start))
   354  	cLog("=================================")
   355  
   356  	cLog("Outputing...")
   357  	totalRows := int(float64(len(data)) * (float64(outputRatio) / 100.0))
   358  	start = time.Now()
   359  	if cpuprofile != "" {
   360  		err = pprof.StartCPUProfile(profile)
   361  		terror.MustNil(err)
   362  	}
   363  	for i := 0; i < totalRows; i++ {
   364  		_, _, _, err = fs.Output()
   365  		terror.MustNil(err)
   366  	}
   367  	if cpuprofile != "" {
   368  		pprof.StopCPUProfile()
   369  	}
   370  	cLog("Done!")
   371  	cLogf("Output %d rows", totalRows)
   372  	cLog("Time used: ", time.Since(start))
   373  	cLog("=================================")
   374  
   375  	cLog("Closing...")
   376  	start = time.Now()
   377  	err = fs.Close()
   378  	terror.MustNil(err)
   379  	cLog("Done!")
   380  	cLog("Time used: ", time.Since(start))
   381  	cLog("=================================")
   382  }
   383  
   384  func init() {
   385  	err := logutil.InitZapLogger(logutil.NewLogConfig(logLevel, logutil.DefaultLogFormat, "", logutil.EmptyFileLogConfig, false))
   386  	terror.MustNil(err)
   387  	cwd, err1 := os.Getwd()
   388  	terror.MustNil(err1)
   389  
   390  	genCmd.StringVar(&tmFIDelir, "dir", cwd, "where to causetstore the generated rows")
   391  	genCmd.IntVar(&keySize, "keySize", 8, "the size of key")
   392  	genCmd.IntVar(&valSize, "valSize", 8, "the size of value")
   393  	genCmd.IntVar(&scale, "scale", 100, "how many rows to generate")
   394  	genCmd.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file")
   395  
   396  	runCmd.StringVar(&tmFIDelir, "dir", cwd, "where to load the generated rows")
   397  	runCmd.IntVar(&bufSize, "bufSize", 500000, "how many rows held in memory at a time")
   398  	runCmd.IntVar(&nWorkers, "nWorkers", 1, "how many workers used in async sorting")
   399  	runCmd.IntVar(&inputRatio, "inputRatio", 100, "input percentage")
   400  	runCmd.IntVar(&outputRatio, "outputRatio", 100, "output percentage")
   401  	runCmd.StringVar(&cpuprofile, "cpuprofile", "", "write cpu profile to file")
   402  }
   403  
   404  func main() {
   405  	flag.Parse()
   406  
   407  	if len(os.Args) == 1 {
   408  		fmt.Printf("Usage:\n\n")
   409  		fmt.Printf("\tbenchfilesort command [arguments]\n\n")
   410  		fmt.Printf("The commands are:\n\n")
   411  		fmt.Println("\tgen\t", "generate rows")
   412  		fmt.Println("\trun\t", "run tests")
   413  		fmt.Println("")
   414  		fmt.Println("Checkout benchfilesort/README for more information.")
   415  		return
   416  	}
   417  
   418  	switch os.Args[1] {
   419  	case "gen":
   420  		driveGenCmd()
   421  	case "run":
   422  		driveRunCmd()
   423  	default:
   424  		fmt.Printf("%q is not valid command.\n", os.Args[1])
   425  		os.Exit(2)
   426  	}
   427  }
   428  
   429  func cLogf(format string, args ...interface{}) {
   430  	str := fmt.Sprintf(format, args...)
   431  	fmt.Println("\033[0;32m" + str + "\033[0m")
   432  }
   433  
   434  func cLog(args ...interface{}) {
   435  	str := fmt.Sprint(args...)
   436  	fmt.Println("\033[0;32m" + str + "\033[0m")
   437  }