github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/querybench/query_bench.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package querybench
    12  
    13  import (
    14  	"bufio"
    15  	"context"
    16  	gosql "database/sql"
    17  	"fmt"
    18  	"os"
    19  	"strings"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    22  	"github.com/cockroachdb/cockroach/pkg/workload"
    23  	"github.com/cockroachdb/cockroach/pkg/workload/histogram"
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/spf13/pflag"
    26  )
    27  
    28  type queryBench struct {
    29  	flags           workload.Flags
    30  	connFlags       *workload.ConnFlags
    31  	queryFile       string
    32  	numRunsPerQuery int
    33  	vectorize       string
    34  	verbose         bool
    35  
    36  	queries []string
    37  }
    38  
    39  func init() {
    40  	workload.Register(queryBenchMeta)
    41  }
    42  
    43  var queryBenchMeta = workload.Meta{
    44  	Name: `querybench`,
    45  	Description: `QueryBench runs queries from the specified file. The queries are run ` +
    46  		`sequentially in each concurrent worker.`,
    47  	Version: `1.0.0`,
    48  	New: func() workload.Generator {
    49  		g := &queryBench{}
    50  		g.flags.FlagSet = pflag.NewFlagSet(`querybench`, pflag.ContinueOnError)
    51  		g.flags.Meta = map[string]workload.FlagMeta{
    52  			`query-file`: {RuntimeOnly: true},
    53  			`optimizer`:  {RuntimeOnly: true},
    54  			`vectorize`:  {RuntimeOnly: true},
    55  			`num-runs`:   {RuntimeOnly: true},
    56  		}
    57  		g.flags.StringVar(&g.queryFile, `query-file`, ``, `File of newline separated queries to run`)
    58  		g.flags.IntVar(&g.numRunsPerQuery, `num-runs`, 0, `Specifies the number of times each query in the query file to be run `+
    59  			`(note that --duration and --max-ops take precedence, so if duration or max-ops is reached, querybench will exit without honoring --num-runs)`)
    60  		g.flags.StringVar(&g.vectorize, `vectorize`, "", `Set vectorize session variable`)
    61  		g.flags.BoolVar(&g.verbose, `verbose`, true, `Prints out the queries being run as well as histograms`)
    62  		g.connFlags = workload.NewConnFlags(&g.flags)
    63  		return g
    64  	},
    65  }
    66  
    67  // vectorizeSetting19_2Translation is a mapping from the 20.1+ vectorize session
    68  // variable value to the 19.2 syntax.
    69  var vectorizeSetting19_2Translation = map[string]string{
    70  	"on": "experimental_on",
    71  }
    72  
    73  // Meta implements the Generator interface.
    74  func (*queryBench) Meta() workload.Meta { return queryBenchMeta }
    75  
    76  // Flags implements the Flagser interface.
    77  func (g *queryBench) Flags() workload.Flags { return g.flags }
    78  
    79  // Hooks implements the Hookser interface.
    80  func (g *queryBench) Hooks() workload.Hooks {
    81  	return workload.Hooks{
    82  		Validate: func() error {
    83  			if g.queryFile == "" {
    84  				return errors.Errorf("Missing required argument '--query-file'")
    85  			}
    86  			queries, err := GetQueries(g.queryFile)
    87  			if err != nil {
    88  				return err
    89  			}
    90  			if len(queries) < 1 {
    91  				return errors.New("no queries found in file")
    92  			}
    93  			g.queries = queries
    94  			if g.numRunsPerQuery < 0 {
    95  				return errors.New("negative --num-runs specified")
    96  			}
    97  			return nil
    98  		},
    99  	}
   100  }
   101  
   102  // Tables implements the Generator interface.
   103  func (*queryBench) Tables() []workload.Table {
   104  	// Assume the necessary tables are already present.
   105  	return []workload.Table{}
   106  }
   107  
   108  // Ops implements the Opser interface.
   109  func (g *queryBench) Ops(urls []string, reg *histogram.Registry) (workload.QueryLoad, error) {
   110  	sqlDatabase, err := workload.SanitizeUrls(g, g.connFlags.DBOverride, urls)
   111  	if err != nil {
   112  		return workload.QueryLoad{}, err
   113  	}
   114  	db, err := gosql.Open(`cockroach`, strings.Join(urls, ` `))
   115  	if err != nil {
   116  		return workload.QueryLoad{}, err
   117  	}
   118  	// Allow a maximum of concurrency+1 connections to the database.
   119  	db.SetMaxOpenConns(g.connFlags.Concurrency + 1)
   120  	db.SetMaxIdleConns(g.connFlags.Concurrency + 1)
   121  
   122  	if g.vectorize != "" {
   123  		_, err := db.Exec("SET vectorize=" + g.vectorize)
   124  		if err != nil && strings.Contains(err.Error(), "invalid value") {
   125  			if _, ok := vectorizeSetting19_2Translation[g.vectorize]; ok {
   126  				// Fall back to using the pre-20.1 vectorize options.
   127  				_, err = db.Exec("SET vectorize=" + vectorizeSetting19_2Translation[g.vectorize])
   128  			}
   129  		}
   130  		if err != nil {
   131  			return workload.QueryLoad{}, err
   132  		}
   133  	}
   134  
   135  	stmts := make([]namedStmt, len(g.queries))
   136  	for i, query := range g.queries {
   137  		stmts[i] = namedStmt{
   138  			// TODO(solon): Allow specifying names in the query file rather than using
   139  			// the entire query as the name.
   140  			name: fmt.Sprintf("%2d: %s", i+1, query),
   141  		}
   142  		stmt, err := db.Prepare(query)
   143  		if err != nil {
   144  			stmts[i].query = query
   145  			continue
   146  		}
   147  		stmts[i].preparedStmt = stmt
   148  	}
   149  
   150  	maxNumStmts := 0
   151  	if g.numRunsPerQuery > 0 {
   152  		maxNumStmts = g.numRunsPerQuery * len(g.queries)
   153  	}
   154  
   155  	ql := workload.QueryLoad{SQLDatabase: sqlDatabase}
   156  	for i := 0; i < g.connFlags.Concurrency; i++ {
   157  		op := queryBenchWorker{
   158  			hists:       reg.GetHandle(),
   159  			db:          db,
   160  			stmts:       stmts,
   161  			verbose:     g.verbose,
   162  			maxNumStmts: maxNumStmts,
   163  		}
   164  		ql.WorkerFns = append(ql.WorkerFns, op.run)
   165  	}
   166  	return ql, nil
   167  }
   168  
   169  // GetQueries returns the lines of a file as a string slice. Ignores lines
   170  // beginning with '#' or '--'.
   171  func GetQueries(path string) ([]string, error) {
   172  	file, err := os.Open(path)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	defer file.Close()
   177  
   178  	scanner := bufio.NewScanner(file)
   179  	// Read lines up to 1 MB in size.
   180  	scanner.Buffer(make([]byte, 64*1024), 1024*1024)
   181  	var lines []string
   182  	for scanner.Scan() {
   183  		line := scanner.Text()
   184  		if len(line) > 0 && line[0] != '#' && !strings.HasPrefix(line, "--") {
   185  			lines = append(lines, line)
   186  		}
   187  	}
   188  	if err := scanner.Err(); err != nil {
   189  		return nil, err
   190  	}
   191  	return lines, nil
   192  }
   193  
   194  type namedStmt struct {
   195  	name string
   196  	// We will try to Prepare the statement, and if that succeeds, the prepared
   197  	// statement will be stored in `preparedStmt', otherwise, we will store
   198  	// plain query in 'query'.
   199  	preparedStmt *gosql.Stmt
   200  	query        string
   201  }
   202  
   203  type queryBenchWorker struct {
   204  	hists *histogram.Histograms
   205  	db    *gosql.DB
   206  	stmts []namedStmt
   207  
   208  	stmtIdx int
   209  	verbose bool
   210  
   211  	// maxNumStmts indicates the maximum number of statements for the worker to
   212  	// execute. It is non-zero only when --num-runs flag is specified for the
   213  	// workload.
   214  	maxNumStmts int
   215  }
   216  
   217  func (o *queryBenchWorker) run(ctx context.Context) error {
   218  	if o.maxNumStmts > 0 {
   219  		if o.stmtIdx >= o.maxNumStmts {
   220  			// This worker has already reached the maximum number of statements to
   221  			// execute.
   222  			return nil
   223  		}
   224  	}
   225  	start := timeutil.Now()
   226  	stmt := o.stmts[o.stmtIdx%len(o.stmts)]
   227  	o.stmtIdx++
   228  
   229  	exhaustRows := func(execFn func() (*gosql.Rows, error)) error {
   230  		rows, err := execFn()
   231  		if err != nil {
   232  			return err
   233  		}
   234  		defer rows.Close()
   235  		for rows.Next() {
   236  		}
   237  		if err := rows.Err(); err != nil {
   238  			return err
   239  		}
   240  		return nil
   241  	}
   242  	if stmt.preparedStmt != nil {
   243  		if err := exhaustRows(func() (*gosql.Rows, error) {
   244  			return stmt.preparedStmt.Query()
   245  		}); err != nil {
   246  			return err
   247  		}
   248  	} else {
   249  		if err := exhaustRows(func() (*gosql.Rows, error) {
   250  			return o.db.Query(stmt.query)
   251  		}); err != nil {
   252  			return err
   253  		}
   254  	}
   255  	elapsed := timeutil.Since(start)
   256  	if o.verbose {
   257  		o.hists.Get(stmt.name).Record(elapsed)
   258  	} else {
   259  		o.hists.Get("").Record(elapsed)
   260  	}
   261  	return nil
   262  }