github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/workload/csv.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package workload 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "io" 18 "net/http" 19 "strconv" 20 "strings" 21 "unsafe" 22 23 "github.com/cockroachdb/cockroach/pkg/col/coldata" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/util/bufalloc" 26 "github.com/cockroachdb/cockroach/pkg/util/encoding/csv" 27 "github.com/cockroachdb/errors" 28 "github.com/spf13/pflag" 29 ) 30 31 const ( 32 rowStartParam = `row-start` 33 rowEndParam = `row-end` 34 ) 35 36 // WriteCSVRows writes the specified table rows as a csv. If sizeBytesLimit is > 37 // 0, it will be used as an approximate upper bound for how much to write. The 38 // next rowStart is returned (so last row written + 1). 39 func WriteCSVRows( 40 ctx context.Context, w io.Writer, table Table, rowStart, rowEnd int, sizeBytesLimit int64, 41 ) (rowBatchIdx int, err error) { 42 cb := coldata.NewMemBatchWithSize(nil /* types */, 0 /* size */, coldata.StandardColumnFactory) 43 var a bufalloc.ByteAllocator 44 45 bytesWrittenW := &bytesWrittenWriter{w: w} 46 csvW := csv.NewWriter(bytesWrittenW) 47 var rowStrings []string 48 for rowBatchIdx = rowStart; rowBatchIdx < rowEnd; rowBatchIdx++ { 49 if sizeBytesLimit > 0 && bytesWrittenW.written > sizeBytesLimit { 50 break 51 } 52 53 select { 54 case <-ctx.Done(): 55 return 0, ctx.Err() 56 default: 57 } 58 a = a[:0] 59 table.InitialRows.FillBatch(rowBatchIdx, cb, &a) 60 if numCols := cb.Width(); cap(rowStrings) < numCols { 61 rowStrings = make([]string, numCols) 62 } else { 63 rowStrings = rowStrings[:numCols] 64 } 65 for rowIdx, numRows := 0, cb.Length(); rowIdx < numRows; rowIdx++ { 66 for colIdx, col := range cb.ColVecs() { 67 rowStrings[colIdx] = colDatumToCSVString(col, rowIdx) 68 } 69 if err := csvW.Write(rowStrings); err != nil { 70 return 0, err 71 } 72 } 73 } 74 csvW.Flush() 75 return rowBatchIdx, csvW.Error() 76 } 77 78 type csvRowsReader struct { 79 t Table 80 batchStart, batchEnd int 81 82 buf bytes.Buffer 83 csvW *csv.Writer 84 85 batchIdx int 86 cb coldata.Batch 87 a bufalloc.ByteAllocator 88 89 stringsBuf []string 90 } 91 92 func (r *csvRowsReader) Read(p []byte) (n int, err error) { 93 if r.cb == nil { 94 r.cb = coldata.NewMemBatchWithSize(nil /* types */, 0 /* size */, coldata.StandardColumnFactory) 95 } 96 97 for { 98 if r.buf.Len() > 0 { 99 return r.buf.Read(p) 100 } 101 r.buf.Reset() 102 if r.batchIdx == r.batchEnd { 103 return 0, io.EOF 104 } 105 r.a = r.a[:0] 106 r.t.InitialRows.FillBatch(r.batchIdx, r.cb, &r.a) 107 r.batchIdx++ 108 if numCols := r.cb.Width(); cap(r.stringsBuf) < numCols { 109 r.stringsBuf = make([]string, numCols) 110 } else { 111 r.stringsBuf = r.stringsBuf[:numCols] 112 } 113 for rowIdx, numRows := 0, r.cb.Length(); rowIdx < numRows; rowIdx++ { 114 for colIdx, col := range r.cb.ColVecs() { 115 r.stringsBuf[colIdx] = colDatumToCSVString(col, rowIdx) 116 } 117 if err := r.csvW.Write(r.stringsBuf); err != nil { 118 return 0, err 119 } 120 } 121 r.csvW.Flush() 122 } 123 } 124 125 // NewCSVRowsReader returns an io.Reader that outputs the initial data of the 126 // given table as CSVs. If batchEnd is the zero-value it defaults to the end of 127 // the table. 128 func NewCSVRowsReader(t Table, batchStart, batchEnd int) io.Reader { 129 if batchEnd == 0 { 130 batchEnd = t.InitialRows.NumBatches 131 } 132 r := &csvRowsReader{t: t, batchStart: batchStart, batchEnd: batchEnd, batchIdx: batchStart} 133 r.csvW = csv.NewWriter(&r.buf) 134 return r 135 } 136 137 func colDatumToCSVString(col coldata.Vec, rowIdx int) string { 138 if col.Nulls().NullAt(rowIdx) { 139 return `NULL` 140 } 141 switch col.CanonicalTypeFamily() { 142 case types.BoolFamily: 143 return strconv.FormatBool(col.Bool()[rowIdx]) 144 case types.IntFamily: 145 return strconv.FormatInt(col.Int64()[rowIdx], 10) 146 case types.FloatFamily: 147 return strconv.FormatFloat(col.Float64()[rowIdx], 'f', -1, 64) 148 case types.BytesFamily: 149 // See the HACK comment in ColBatchToRows. 150 bytes := col.Bytes().Get(rowIdx) 151 return *(*string)(unsafe.Pointer(&bytes)) 152 } 153 panic(fmt.Sprintf(`unhandled type %s`, col.Type())) 154 } 155 156 // HandleCSV configures a Generator with url params and outputs the data for a 157 // single Table as a CSV (optionally limiting the rows via `row-start` and 158 // `row-end` params). It is intended for use in implementing a 159 // `net/http.Handler`. 160 func HandleCSV(w http.ResponseWriter, req *http.Request, prefix string, meta Meta) error { 161 ctx := context.Background() 162 if err := req.ParseForm(); err != nil { 163 return err 164 } 165 166 gen := meta.New() 167 if f, ok := gen.(Flagser); ok { 168 var flags []string 169 f.Flags().VisitAll(func(f *pflag.Flag) { 170 if vals, ok := req.Form[f.Name]; ok { 171 for _, val := range vals { 172 flags = append(flags, fmt.Sprintf(`--%s=%s`, f.Name, val)) 173 } 174 } 175 }) 176 if err := f.Flags().Parse(flags); err != nil { 177 return errors.Wrapf(err, `parsing parameters %s`, strings.Join(flags, ` `)) 178 } 179 } 180 181 tableName := strings.TrimPrefix(req.URL.Path, prefix) 182 var table *Table 183 for _, t := range gen.Tables() { 184 if t.Name == tableName { 185 table = &t 186 break 187 } 188 } 189 if table == nil { 190 return errors.Errorf(`could not find table %s in generator %s`, tableName, meta.Name) 191 } 192 if table.InitialRows.FillBatch == nil { 193 return errors.Errorf(`csv-server is not supported for workload %s`, meta.Name) 194 } 195 196 rowStart, rowEnd := 0, table.InitialRows.NumBatches 197 if vals, ok := req.Form[rowStartParam]; ok && len(vals) > 0 { 198 var err error 199 rowStart, err = strconv.Atoi(vals[len(vals)-1]) 200 if err != nil { 201 return errors.Wrapf(err, `parsing %s`, rowStartParam) 202 } 203 } 204 if vals, ok := req.Form[rowEndParam]; ok && len(vals) > 0 { 205 var err error 206 rowEnd, err = strconv.Atoi(vals[len(vals)-1]) 207 if err != nil { 208 return errors.Wrapf(err, `parsing %s`, rowEndParam) 209 } 210 } 211 212 w.Header().Set(`Content-Type`, `text/csv`) 213 _, err := WriteCSVRows(ctx, w, *table, rowStart, rowEnd, -1 /* sizeBytesLimit */) 214 return err 215 } 216 217 type bytesWrittenWriter struct { 218 w io.Writer 219 written int64 220 } 221 222 func (w *bytesWrittenWriter) Write(p []byte) (int, error) { 223 n, err := w.w.Write(p) 224 w.written += int64(n) 225 return n, err 226 } 227 228 // CSVMux returns a mux over http handers for csv data in all tables in the 229 // given generators. 230 func CSVMux(metas []Meta) *http.ServeMux { 231 mux := http.NewServeMux() 232 for _, meta := range metas { 233 meta := meta 234 prefix := fmt.Sprintf(`/csv/%s/`, meta.Name) 235 mux.HandleFunc(prefix, func(w http.ResponseWriter, req *http.Request) { 236 if err := HandleCSV(w, req, prefix, meta); err != nil { 237 http.Error(w, err.Error(), http.StatusInternalServerError) 238 } 239 }) 240 } 241 return mux 242 }