github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/exportcsv.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl
    10  
    11  import (
    12  	"bytes"
    13  	"compress/gzip"
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/ccl/utilccl"
    19  	"github.com/cockroachdb/cockroach/pkg/sql"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/rowexec"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    26  	"github.com/cockroachdb/cockroach/pkg/storage/cloud"
    27  	"github.com/cockroachdb/cockroach/pkg/util/encoding/csv"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  const exportFilePatternPart = "%part%"
    33  const exportFilePatternDefault = exportFilePatternPart + ".csv"
    34  
    35  // csvExporter data structure to augment the compression
    36  // and csv writer, encapsulating the internals to make
    37  // exporting oblivious for the consumers
    38  type csvExporter struct {
    39  	compressor *gzip.Writer
    40  	buf        *bytes.Buffer
    41  	csvWriter  *csv.Writer
    42  }
    43  
    44  // Write append record to csv file
    45  func (c *csvExporter) Write(record []string) error {
    46  	return c.csvWriter.Write(record)
    47  }
    48  
    49  // Close closes the compressor writer which
    50  // appends archive footers
    51  func (c *csvExporter) Close() error {
    52  	if c.compressor != nil {
    53  		return c.compressor.Close()
    54  	}
    55  	return nil
    56  }
    57  
    58  // Flush flushes both csv and compressor writer if
    59  // initialized
    60  func (c *csvExporter) Flush() error {
    61  	c.csvWriter.Flush()
    62  	if c.compressor != nil {
    63  		return c.compressor.Flush()
    64  	}
    65  	return nil
    66  }
    67  
    68  // ResetBuffer resets the buffer and compressor state.
    69  func (c *csvExporter) ResetBuffer() {
    70  	c.buf.Reset()
    71  	if c.compressor != nil {
    72  		// Brings compressor to its initial state
    73  		c.compressor.Reset(c.buf)
    74  	}
    75  }
    76  
    77  // Bytes results in the slice of bytes with compressed content
    78  func (c *csvExporter) Bytes() []byte {
    79  	return c.buf.Bytes()
    80  }
    81  
    82  // Len returns length of the buffer with content
    83  func (c *csvExporter) Len() int {
    84  	return c.buf.Len()
    85  }
    86  
    87  func (c *csvExporter) FileName(spec execinfrapb.CSVWriterSpec, part string) string {
    88  	pattern := exportFilePatternDefault
    89  	if spec.NamePattern != "" {
    90  		pattern = spec.NamePattern
    91  	}
    92  
    93  	fileName := strings.Replace(pattern, exportFilePatternPart, part, -1)
    94  	// TODO: add suffix based on compressor type
    95  	if c.compressor != nil {
    96  		fileName += ".gz"
    97  	}
    98  	return fileName
    99  }
   100  
   101  func newCSVExporter(sp execinfrapb.CSVWriterSpec) *csvExporter {
   102  	buf := bytes.NewBuffer([]byte{})
   103  	var exporter *csvExporter
   104  	switch sp.CompressionCodec {
   105  	case execinfrapb.FileCompression_Gzip:
   106  		{
   107  			writer := gzip.NewWriter(buf)
   108  			exporter = &csvExporter{
   109  				compressor: writer,
   110  				buf:        buf,
   111  				csvWriter:  csv.NewWriter(writer),
   112  			}
   113  		}
   114  	default:
   115  		{
   116  			exporter = &csvExporter{
   117  				buf:       buf,
   118  				csvWriter: csv.NewWriter(buf),
   119  			}
   120  		}
   121  	}
   122  	if sp.Options.Comma != 0 {
   123  		exporter.csvWriter.Comma = sp.Options.Comma
   124  	}
   125  	return exporter
   126  }
   127  
   128  func newCSVWriterProcessor(
   129  	flowCtx *execinfra.FlowCtx,
   130  	processorID int32,
   131  	spec execinfrapb.CSVWriterSpec,
   132  	input execinfra.RowSource,
   133  	output execinfra.RowReceiver,
   134  ) (execinfra.Processor, error) {
   135  
   136  	if err := utilccl.CheckEnterpriseEnabled(
   137  		flowCtx.Cfg.Settings,
   138  		flowCtx.Cfg.ClusterID.Get(),
   139  		sql.ClusterOrganization.Get(&flowCtx.Cfg.Settings.SV),
   140  		"EXPORT",
   141  	); err != nil {
   142  		return nil, err
   143  	}
   144  
   145  	c := &csvWriter{
   146  		flowCtx:     flowCtx,
   147  		processorID: processorID,
   148  		spec:        spec,
   149  		input:       input,
   150  		output:      output,
   151  	}
   152  	if err := c.out.Init(&execinfrapb.PostProcessSpec{}, c.OutputTypes(), flowCtx.NewEvalCtx(), output); err != nil {
   153  		return nil, err
   154  	}
   155  	return c, nil
   156  }
   157  
   158  type csvWriter struct {
   159  	flowCtx     *execinfra.FlowCtx
   160  	processorID int32
   161  	spec        execinfrapb.CSVWriterSpec
   162  	input       execinfra.RowSource
   163  	out         execinfra.ProcOutputHelper
   164  	output      execinfra.RowReceiver
   165  }
   166  
   167  var _ execinfra.Processor = &csvWriter{}
   168  
   169  func (sp *csvWriter) OutputTypes() []*types.T {
   170  	res := make([]*types.T, len(sqlbase.ExportColumns))
   171  	for i := range res {
   172  		res[i] = sqlbase.ExportColumns[i].Typ
   173  	}
   174  	return res
   175  }
   176  
   177  func (sp *csvWriter) Run(ctx context.Context) {
   178  	ctx, span := tracing.ChildSpan(ctx, "csvWriter")
   179  	defer tracing.FinishSpan(span)
   180  
   181  	err := func() error {
   182  		typs := sp.input.OutputTypes()
   183  		sp.input.Start(ctx)
   184  		input := execinfra.MakeNoMetadataRowSource(sp.input, sp.output)
   185  
   186  		alloc := &sqlbase.DatumAlloc{}
   187  
   188  		writer := newCSVExporter(sp.spec)
   189  
   190  		nullsAs := ""
   191  		if sp.spec.Options.NullEncoding != nil {
   192  			nullsAs = *sp.spec.Options.NullEncoding
   193  		}
   194  		f := tree.NewFmtCtx(tree.FmtExport)
   195  		defer f.Close()
   196  
   197  		csvRow := make([]string, len(typs))
   198  
   199  		chunk := 0
   200  		done := false
   201  		for {
   202  			var rows int64
   203  			writer.ResetBuffer()
   204  			for {
   205  				if sp.spec.ChunkRows > 0 && rows >= sp.spec.ChunkRows {
   206  					break
   207  				}
   208  				row, err := input.NextRow()
   209  				if err != nil {
   210  					return err
   211  				}
   212  				if row == nil {
   213  					done = true
   214  					break
   215  				}
   216  				rows++
   217  
   218  				for i, ed := range row {
   219  					if ed.IsNull() {
   220  						csvRow[i] = nullsAs
   221  						continue
   222  					}
   223  					if err := ed.EnsureDecoded(typs[i], alloc); err != nil {
   224  						return err
   225  					}
   226  					ed.Datum.Format(f)
   227  					csvRow[i] = f.String()
   228  					f.Reset()
   229  				}
   230  				if err := writer.Write(csvRow); err != nil {
   231  					return err
   232  				}
   233  			}
   234  			if rows < 1 {
   235  				break
   236  			}
   237  			if err := writer.Flush(); err != nil {
   238  				return errors.Wrap(err, "failed to flush csv writer")
   239  			}
   240  
   241  			conf, err := cloud.ExternalStorageConfFromURI(sp.spec.Destination)
   242  			if err != nil {
   243  				return err
   244  			}
   245  			es, err := sp.flowCtx.Cfg.ExternalStorage(ctx, conf)
   246  			if err != nil {
   247  				return err
   248  			}
   249  			defer es.Close()
   250  
   251  			nodeID, err := sp.flowCtx.EvalCtx.NodeID.OptionalNodeIDErr(47970)
   252  			if err != nil {
   253  				return err
   254  			}
   255  
   256  			part := fmt.Sprintf("n%d.%d", nodeID, chunk)
   257  			chunk++
   258  			filename := writer.FileName(sp.spec, part)
   259  			// Close writer to ensure buffer and any compression footer is flushed.
   260  			err = writer.Close()
   261  			if err != nil {
   262  				return errors.Wrapf(err, "failed to close exporting writer")
   263  			}
   264  
   265  			size := writer.Len()
   266  
   267  			if err := es.WriteFile(ctx, filename, bytes.NewReader(writer.Bytes())); err != nil {
   268  				return err
   269  			}
   270  			res := sqlbase.EncDatumRow{
   271  				sqlbase.DatumToEncDatum(
   272  					types.String,
   273  					tree.NewDString(filename),
   274  				),
   275  				sqlbase.DatumToEncDatum(
   276  					types.Int,
   277  					tree.NewDInt(tree.DInt(rows)),
   278  				),
   279  				sqlbase.DatumToEncDatum(
   280  					types.Int,
   281  					tree.NewDInt(tree.DInt(size)),
   282  				),
   283  			}
   284  
   285  			cs, err := sp.out.EmitRow(ctx, res)
   286  			if err != nil {
   287  				return err
   288  			}
   289  			if cs != execinfra.NeedMoreRows {
   290  				// TODO(dt): presumably this is because our recv already closed due to
   291  				// another error... so do we really need another one?
   292  				return errors.New("unexpected closure of consumer")
   293  			}
   294  			if done {
   295  				break
   296  			}
   297  		}
   298  
   299  		return nil
   300  	}()
   301  
   302  	// TODO(dt): pick up tracing info in trailing meta
   303  	execinfra.DrainAndClose(
   304  		ctx, sp.output, err, func(context.Context) {} /* pushTrailingMeta */, sp.input)
   305  }
   306  
   307  func init() {
   308  	rowexec.NewCSVWriterProcessor = newCSVWriterProcessor
   309  }