github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/csv.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package workload
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"net/http"
    19  	"strconv"
    20  	"strings"
    21  	"unsafe"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
    27  	"github.com/cockroachdb/errors"
    28  	"github.com/spf13/pflag"
    29  )
    30  
    31  const (
    32  	rowStartParam = `row-start`
    33  	rowEndParam   = `row-end`
    34  )
    35  
    36  // WriteCSVRows writes the specified table rows as a csv. If sizeBytesLimit is >
    37  // 0, it will be used as an approximate upper bound for how much to write. The
    38  // next rowStart is returned (so last row written + 1).
    39  func WriteCSVRows(
    40  	ctx context.Context, w io.Writer, table Table, rowStart, rowEnd int, sizeBytesLimit int64,
    41  ) (rowBatchIdx int, err error) {
    42  	cb := coldata.NewMemBatchWithSize(nil /* types */, 0 /* size */, coldata.StandardColumnFactory)
    43  	var a bufalloc.ByteAllocator
    44  
    45  	bytesWrittenW := &bytesWrittenWriter{w: w}
    46  	csvW := csv.NewWriter(bytesWrittenW)
    47  	var rowStrings []string
    48  	for rowBatchIdx = rowStart; rowBatchIdx < rowEnd; rowBatchIdx++ {
    49  		if sizeBytesLimit > 0 && bytesWrittenW.written > sizeBytesLimit {
    50  			break
    51  		}
    52  
    53  		select {
    54  		case <-ctx.Done():
    55  			return 0, ctx.Err()
    56  		default:
    57  		}
    58  		a = a[:0]
    59  		table.InitialRows.FillBatch(rowBatchIdx, cb, &a)
    60  		if numCols := cb.Width(); cap(rowStrings) < numCols {
    61  			rowStrings = make([]string, numCols)
    62  		} else {
    63  			rowStrings = rowStrings[:numCols]
    64  		}
    65  		for rowIdx, numRows := 0, cb.Length(); rowIdx < numRows; rowIdx++ {
    66  			for colIdx, col := range cb.ColVecs() {
    67  				rowStrings[colIdx] = colDatumToCSVString(col, rowIdx)
    68  			}
    69  			if err := csvW.Write(rowStrings); err != nil {
    70  				return 0, err
    71  			}
    72  		}
    73  	}
    74  	csvW.Flush()
    75  	return rowBatchIdx, csvW.Error()
    76  }
    77  
    78  type csvRowsReader struct {
    79  	t                    Table
    80  	batchStart, batchEnd int
    81  
    82  	buf  bytes.Buffer
    83  	csvW *csv.Writer
    84  
    85  	batchIdx int
    86  	cb       coldata.Batch
    87  	a        bufalloc.ByteAllocator
    88  
    89  	stringsBuf []string
    90  }
    91  
    92  func (r *csvRowsReader) Read(p []byte) (n int, err error) {
    93  	if r.cb == nil {
    94  		r.cb = coldata.NewMemBatchWithSize(nil /* types */, 0 /* size */, coldata.StandardColumnFactory)
    95  	}
    96  
    97  	for {
    98  		if r.buf.Len() > 0 {
    99  			return r.buf.Read(p)
   100  		}
   101  		r.buf.Reset()
   102  		if r.batchIdx == r.batchEnd {
   103  			return 0, io.EOF
   104  		}
   105  		r.a = r.a[:0]
   106  		r.t.InitialRows.FillBatch(r.batchIdx, r.cb, &r.a)
   107  		r.batchIdx++
   108  		if numCols := r.cb.Width(); cap(r.stringsBuf) < numCols {
   109  			r.stringsBuf = make([]string, numCols)
   110  		} else {
   111  			r.stringsBuf = r.stringsBuf[:numCols]
   112  		}
   113  		for rowIdx, numRows := 0, r.cb.Length(); rowIdx < numRows; rowIdx++ {
   114  			for colIdx, col := range r.cb.ColVecs() {
   115  				r.stringsBuf[colIdx] = colDatumToCSVString(col, rowIdx)
   116  			}
   117  			if err := r.csvW.Write(r.stringsBuf); err != nil {
   118  				return 0, err
   119  			}
   120  		}
   121  		r.csvW.Flush()
   122  	}
   123  }
   124  
   125  // NewCSVRowsReader returns an io.Reader that outputs the initial data of the
   126  // given table as CSVs. If batchEnd is the zero-value it defaults to the end of
   127  // the table.
   128  func NewCSVRowsReader(t Table, batchStart, batchEnd int) io.Reader {
   129  	if batchEnd == 0 {
   130  		batchEnd = t.InitialRows.NumBatches
   131  	}
   132  	r := &csvRowsReader{t: t, batchStart: batchStart, batchEnd: batchEnd, batchIdx: batchStart}
   133  	r.csvW = csv.NewWriter(&r.buf)
   134  	return r
   135  }
   136  
   137  func colDatumToCSVString(col coldata.Vec, rowIdx int) string {
   138  	if col.Nulls().NullAt(rowIdx) {
   139  		return `NULL`
   140  	}
   141  	switch col.CanonicalTypeFamily() {
   142  	case types.BoolFamily:
   143  		return strconv.FormatBool(col.Bool()[rowIdx])
   144  	case types.IntFamily:
   145  		return strconv.FormatInt(col.Int64()[rowIdx], 10)
   146  	case types.FloatFamily:
   147  		return strconv.FormatFloat(col.Float64()[rowIdx], 'f', -1, 64)
   148  	case types.BytesFamily:
   149  		// See the HACK comment in ColBatchToRows.
   150  		bytes := col.Bytes().Get(rowIdx)
   151  		return *(*string)(unsafe.Pointer(&bytes))
   152  	}
   153  	panic(fmt.Sprintf(`unhandled type %s`, col.Type()))
   154  }
   155  
   156  // HandleCSV configures a Generator with url params and outputs the data for a
   157  // single Table as a CSV (optionally limiting the rows via `row-start` and
   158  // `row-end` params). It is intended for use in implementing a
   159  // `net/http.Handler`.
   160  func HandleCSV(w http.ResponseWriter, req *http.Request, prefix string, meta Meta) error {
   161  	ctx := context.Background()
   162  	if err := req.ParseForm(); err != nil {
   163  		return err
   164  	}
   165  
   166  	gen := meta.New()
   167  	if f, ok := gen.(Flagser); ok {
   168  		var flags []string
   169  		f.Flags().VisitAll(func(f *pflag.Flag) {
   170  			if vals, ok := req.Form[f.Name]; ok {
   171  				for _, val := range vals {
   172  					flags = append(flags, fmt.Sprintf(`--%s=%s`, f.Name, val))
   173  				}
   174  			}
   175  		})
   176  		if err := f.Flags().Parse(flags); err != nil {
   177  			return errors.Wrapf(err, `parsing parameters %s`, strings.Join(flags, ` `))
   178  		}
   179  	}
   180  
   181  	tableName := strings.TrimPrefix(req.URL.Path, prefix)
   182  	var table *Table
   183  	for _, t := range gen.Tables() {
   184  		if t.Name == tableName {
   185  			table = &t
   186  			break
   187  		}
   188  	}
   189  	if table == nil {
   190  		return errors.Errorf(`could not find table %s in generator %s`, tableName, meta.Name)
   191  	}
   192  	if table.InitialRows.FillBatch == nil {
   193  		return errors.Errorf(`csv-server is not supported for workload %s`, meta.Name)
   194  	}
   195  
   196  	rowStart, rowEnd := 0, table.InitialRows.NumBatches
   197  	if vals, ok := req.Form[rowStartParam]; ok && len(vals) > 0 {
   198  		var err error
   199  		rowStart, err = strconv.Atoi(vals[len(vals)-1])
   200  		if err != nil {
   201  			return errors.Wrapf(err, `parsing %s`, rowStartParam)
   202  		}
   203  	}
   204  	if vals, ok := req.Form[rowEndParam]; ok && len(vals) > 0 {
   205  		var err error
   206  		rowEnd, err = strconv.Atoi(vals[len(vals)-1])
   207  		if err != nil {
   208  			return errors.Wrapf(err, `parsing %s`, rowEndParam)
   209  		}
   210  	}
   211  
   212  	w.Header().Set(`Content-Type`, `text/csv`)
   213  	_, err := WriteCSVRows(ctx, w, *table, rowStart, rowEnd, -1 /* sizeBytesLimit */)
   214  	return err
   215  }
   216  
   217  type bytesWrittenWriter struct {
   218  	w       io.Writer
   219  	written int64
   220  }
   221  
   222  func (w *bytesWrittenWriter) Write(p []byte) (int, error) {
   223  	n, err := w.w.Write(p)
   224  	w.written += int64(n)
   225  	return n, err
   226  }
   227  
   228  // CSVMux returns a mux over http handers for csv data in all tables in the
   229  // given generators.
   230  func CSVMux(metas []Meta) *http.ServeMux {
   231  	mux := http.NewServeMux()
   232  	for _, meta := range metas {
   233  		meta := meta
   234  		prefix := fmt.Sprintf(`/csv/%s/`, meta.Name)
   235  		mux.HandleFunc(prefix, func(w http.ResponseWriter, req *http.Request) {
   236  			if err := HandleCSV(w, req, prefix, meta); err != nil {
   237  				http.Error(w, err.Error(), http.StatusInternalServerError)
   238  			}
   239  		})
   240  	}
   241  	return mux
   242  }