github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/etl/csv.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package etl
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/csv"
    21  	"io"
    22  	"sync"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    25  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    26  	"github.com/matrixorigin/matrixone/pkg/common/util"
    27  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    28  	"github.com/matrixorigin/matrixone/pkg/util/export/table"
    29  )
    30  
    31  const initedSize = 4 * mpool.MB
    32  
    33  var bufPool = sync.Pool{New: func() any {
    34  	return bytes.NewBuffer(make([]byte, 0, initedSize))
    35  }}
    36  
    37  func getBuffer() *bytes.Buffer {
    38  	return bufPool.Get().(*bytes.Buffer)
    39  }
    40  
    41  func putBuffer(buf *bytes.Buffer) {
    42  	if buf != nil {
    43  		buf.Reset()
    44  		bufPool.Put(buf)
    45  	}
    46  }
    47  
    48  var _ table.RowWriter = (*CSVWriter)(nil)
    49  
    50  type CSVWriter struct {
    51  	ctx    context.Context
    52  	writer io.StringWriter
    53  
    54  	buf       *bytes.Buffer
    55  	formatter *csv.Writer
    56  }
    57  
    58  func NewCSVWriter(ctx context.Context, writer io.StringWriter) *CSVWriter {
    59  	w := &CSVWriter{
    60  		ctx:       ctx,
    61  		writer:    writer,
    62  		buf:       nil,
    63  		formatter: nil,
    64  	}
    65  	return w
    66  }
    67  
    68  func (w *CSVWriter) initBuffer() {
    69  	if w.buf == nil {
    70  		w.buf = getBuffer()
    71  		w.formatter = csv.NewWriter(w.buf)
    72  	}
    73  }
    74  func (w *CSVWriter) releaseBuffer() {
    75  	if w.buf != nil {
    76  		w.formatter = nil
    77  		putBuffer(w.buf)
    78  	}
    79  }
    80  
    81  func (w *CSVWriter) WriteRow(row *table.Row) error {
    82  	return w.WriteStrings(row.ToStrings())
    83  }
    84  
    85  func (w *CSVWriter) WriteStrings(record []string) error {
    86  	w.initBuffer()
    87  	defer w.formatter.Flush()
    88  	err := w.formatter.Write(record)
    89  	if err != nil {
    90  		return moerr.ConvertGoError(w.ctx, err)
    91  	}
    92  	return nil
    93  }
    94  
    95  func (w *CSVWriter) GetContent() string {
    96  	return w.buf.String()
    97  }
    98  
    99  func (w *CSVWriter) FlushAndClose() (int, error) {
   100  	defer w.releaseBuffer()
   101  	if w.buf == nil || w.buf.Len() == 0 {
   102  		return 0, nil
   103  	}
   104  	n, err := w.writer.WriteString(util.UnsafeBytesToString(w.buf.Bytes()))
   105  	if err != nil {
   106  		return 0, err
   107  	}
   108  	w.writer = nil
   109  	w.buf = nil
   110  	return n, nil
   111  }
   112  
   113  type FSWriter struct {
   114  	ctx context.Context         // New args
   115  	fs  fileservice.FileService // New args
   116  	// filepath
   117  	filepath string // see WithFilePath or auto generated by NewFSWriter
   118  
   119  	mux sync.Mutex
   120  
   121  	offset int // see Write, should not have size bigger than 2GB
   122  }
   123  
   124  type FSWriterOption func(*FSWriter)
   125  
   126  func (f FSWriterOption) Apply(w *FSWriter) {
   127  	f(w)
   128  }
   129  
   130  func NewFSWriter(ctx context.Context, fs fileservice.FileService, opts ...FSWriterOption) *FSWriter {
   131  	w := &FSWriter{
   132  		ctx: ctx,
   133  		fs:  fs,
   134  	}
   135  	for _, o := range opts {
   136  		o.Apply(w)
   137  	}
   138  	if len(w.filepath) == 0 {
   139  		panic("filepath is Empty")
   140  	}
   141  	return w
   142  }
   143  
   144  func WithFilePath(filepath string) FSWriterOption {
   145  	return FSWriterOption(func(w *FSWriter) {
   146  		w.filepath = filepath
   147  	})
   148  }
   149  
   150  // Write implement io.Writer, Please execute in series
   151  func (w *FSWriter) Write(p []byte) (n int, err error) {
   152  	w.mux.Lock()
   153  	defer w.mux.Unlock()
   154  	n = len(p)
   155  	mkdirTried := false
   156  mkdirRetry:
   157  	if err = w.fs.Write(w.ctx, fileservice.IOVector{
   158  		// like: etl:store/system/filename.csv
   159  		FilePath: w.filepath,
   160  		Entries: []fileservice.IOEntry{
   161  			{
   162  				Offset: int64(w.offset),
   163  				Size:   int64(n),
   164  				Data:   p,
   165  			},
   166  		},
   167  	}); err == nil {
   168  		w.offset += n
   169  	} else if moerr.IsMoErrCode(err, moerr.ErrFileAlreadyExists) && !mkdirTried {
   170  		mkdirTried = true
   171  		goto mkdirRetry
   172  	}
   173  	// XXX Why call this?
   174  	// _ = errors.WithContext(w.ctx, err)
   175  	return
   176  }
   177  
   178  // WriteString implement io.StringWriter
   179  func (w *FSWriter) WriteString(s string) (n int, err error) {
   180  	var b = util.UnsafeStringToBytes(s)
   181  	return w.Write(b)
   182  }