github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/exportcsv.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "bytes" 13 "compress/gzip" 14 "context" 15 "fmt" 16 "strings" 17 18 "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" 19 "github.com/cockroachdb/cockroach/pkg/sql" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 21 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 22 "github.com/cockroachdb/cockroach/pkg/sql/rowexec" 23 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 24 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 25 "github.com/cockroachdb/cockroach/pkg/sql/types" 26 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 27 "github.com/cockroachdb/cockroach/pkg/util/encoding/csv" 28 "github.com/cockroachdb/cockroach/pkg/util/tracing" 29 "github.com/cockroachdb/errors" 30 ) 31 32 const exportFilePatternPart = "%part%" 33 const exportFilePatternDefault = exportFilePatternPart + ".csv" 34 35 // csvExporter data structure to augment the compression 36 // and csv writer, encapsulating the internals to make 37 // exporting oblivious for the consumers 38 type csvExporter struct { 39 compressor *gzip.Writer 40 buf *bytes.Buffer 41 csvWriter *csv.Writer 42 } 43 44 // Write append record to csv file 45 func (c *csvExporter) Write(record []string) error { 46 return c.csvWriter.Write(record) 47 } 48 49 // Close closes the compressor writer which 50 // appends archive footers 51 func (c *csvExporter) Close() error { 52 if c.compressor != nil { 53 return c.compressor.Close() 54 } 55 return nil 56 } 57 58 // Flush flushes both csv and compressor writer if 59 // initialized 60 func (c *csvExporter) Flush() error { 61 c.csvWriter.Flush() 62 if c.compressor != nil { 63 return c.compressor.Flush() 64 } 65 return nil 66 } 67 68 // ResetBuffer resets the buffer and compressor state. 69 func (c *csvExporter) ResetBuffer() { 70 c.buf.Reset() 71 if c.compressor != nil { 72 // Brings compressor to its initial state 73 c.compressor.Reset(c.buf) 74 } 75 } 76 77 // Bytes results in the slice of bytes with compressed content 78 func (c *csvExporter) Bytes() []byte { 79 return c.buf.Bytes() 80 } 81 82 // Len returns length of the buffer with content 83 func (c *csvExporter) Len() int { 84 return c.buf.Len() 85 } 86 87 func (c *csvExporter) FileName(spec execinfrapb.CSVWriterSpec, part string) string { 88 pattern := exportFilePatternDefault 89 if spec.NamePattern != "" { 90 pattern = spec.NamePattern 91 } 92 93 fileName := strings.Replace(pattern, exportFilePatternPart, part, -1) 94 // TODO: add suffix based on compressor type 95 if c.compressor != nil { 96 fileName += ".gz" 97 } 98 return fileName 99 } 100 101 func newCSVExporter(sp execinfrapb.CSVWriterSpec) *csvExporter { 102 buf := bytes.NewBuffer([]byte{}) 103 var exporter *csvExporter 104 switch sp.CompressionCodec { 105 case execinfrapb.FileCompression_Gzip: 106 { 107 writer := gzip.NewWriter(buf) 108 exporter = &csvExporter{ 109 compressor: writer, 110 buf: buf, 111 csvWriter: csv.NewWriter(writer), 112 } 113 } 114 default: 115 { 116 exporter = &csvExporter{ 117 buf: buf, 118 csvWriter: csv.NewWriter(buf), 119 } 120 } 121 } 122 if sp.Options.Comma != 0 { 123 exporter.csvWriter.Comma = sp.Options.Comma 124 } 125 return exporter 126 } 127 128 func newCSVWriterProcessor( 129 flowCtx *execinfra.FlowCtx, 130 processorID int32, 131 spec execinfrapb.CSVWriterSpec, 132 input execinfra.RowSource, 133 output execinfra.RowReceiver, 134 ) (execinfra.Processor, error) { 135 136 if err := utilccl.CheckEnterpriseEnabled( 137 flowCtx.Cfg.Settings, 138 flowCtx.Cfg.ClusterID.Get(), 139 sql.ClusterOrganization.Get(&flowCtx.Cfg.Settings.SV), 140 "EXPORT", 141 ); err != nil { 142 return nil, err 143 } 144 145 c := &csvWriter{ 146 flowCtx: flowCtx, 147 processorID: processorID, 148 spec: spec, 149 input: input, 150 output: output, 151 } 152 if err := c.out.Init(&execinfrapb.PostProcessSpec{}, c.OutputTypes(), flowCtx.NewEvalCtx(), output); err != nil { 153 return nil, err 154 } 155 return c, nil 156 } 157 158 type csvWriter struct { 159 flowCtx *execinfra.FlowCtx 160 processorID int32 161 spec execinfrapb.CSVWriterSpec 162 input execinfra.RowSource 163 out execinfra.ProcOutputHelper 164 output execinfra.RowReceiver 165 } 166 167 var _ execinfra.Processor = &csvWriter{} 168 169 func (sp *csvWriter) OutputTypes() []*types.T { 170 res := make([]*types.T, len(sqlbase.ExportColumns)) 171 for i := range res { 172 res[i] = sqlbase.ExportColumns[i].Typ 173 } 174 return res 175 } 176 177 func (sp *csvWriter) Run(ctx context.Context) { 178 ctx, span := tracing.ChildSpan(ctx, "csvWriter") 179 defer tracing.FinishSpan(span) 180 181 err := func() error { 182 typs := sp.input.OutputTypes() 183 sp.input.Start(ctx) 184 input := execinfra.MakeNoMetadataRowSource(sp.input, sp.output) 185 186 alloc := &sqlbase.DatumAlloc{} 187 188 writer := newCSVExporter(sp.spec) 189 190 nullsAs := "" 191 if sp.spec.Options.NullEncoding != nil { 192 nullsAs = *sp.spec.Options.NullEncoding 193 } 194 f := tree.NewFmtCtx(tree.FmtExport) 195 defer f.Close() 196 197 csvRow := make([]string, len(typs)) 198 199 chunk := 0 200 done := false 201 for { 202 var rows int64 203 writer.ResetBuffer() 204 for { 205 if sp.spec.ChunkRows > 0 && rows >= sp.spec.ChunkRows { 206 break 207 } 208 row, err := input.NextRow() 209 if err != nil { 210 return err 211 } 212 if row == nil { 213 done = true 214 break 215 } 216 rows++ 217 218 for i, ed := range row { 219 if ed.IsNull() { 220 csvRow[i] = nullsAs 221 continue 222 } 223 if err := ed.EnsureDecoded(typs[i], alloc); err != nil { 224 return err 225 } 226 ed.Datum.Format(f) 227 csvRow[i] = f.String() 228 f.Reset() 229 } 230 if err := writer.Write(csvRow); err != nil { 231 return err 232 } 233 } 234 if rows < 1 { 235 break 236 } 237 if err := writer.Flush(); err != nil { 238 return errors.Wrap(err, "failed to flush csv writer") 239 } 240 241 conf, err := cloud.ExternalStorageConfFromURI(sp.spec.Destination) 242 if err != nil { 243 return err 244 } 245 es, err := sp.flowCtx.Cfg.ExternalStorage(ctx, conf) 246 if err != nil { 247 return err 248 } 249 defer es.Close() 250 251 nodeID, err := sp.flowCtx.EvalCtx.NodeID.OptionalNodeIDErr(47970) 252 if err != nil { 253 return err 254 } 255 256 part := fmt.Sprintf("n%d.%d", nodeID, chunk) 257 chunk++ 258 filename := writer.FileName(sp.spec, part) 259 // Close writer to ensure buffer and any compression footer is flushed. 260 err = writer.Close() 261 if err != nil { 262 return errors.Wrapf(err, "failed to close exporting writer") 263 } 264 265 size := writer.Len() 266 267 if err := es.WriteFile(ctx, filename, bytes.NewReader(writer.Bytes())); err != nil { 268 return err 269 } 270 res := sqlbase.EncDatumRow{ 271 sqlbase.DatumToEncDatum( 272 types.String, 273 tree.NewDString(filename), 274 ), 275 sqlbase.DatumToEncDatum( 276 types.Int, 277 tree.NewDInt(tree.DInt(rows)), 278 ), 279 sqlbase.DatumToEncDatum( 280 types.Int, 281 tree.NewDInt(tree.DInt(size)), 282 ), 283 } 284 285 cs, err := sp.out.EmitRow(ctx, res) 286 if err != nil { 287 return err 288 } 289 if cs != execinfra.NeedMoreRows { 290 // TODO(dt): presumably this is because our recv already closed due to 291 // another error... so do we really need another one? 292 return errors.New("unexpected closure of consumer") 293 } 294 if done { 295 break 296 } 297 } 298 299 return nil 300 }() 301 302 // TODO(dt): pick up tracing info in trailing meta 303 execinfra.DrainAndClose( 304 ctx, sp.output, err, func(context.Context) {} /* pushTrailingMeta */, sp.input) 305 } 306 307 func init() { 308 rowexec.NewCSVWriterProcessor = newCSVWriterProcessor 309 }