github.com/matrixorigin/matrixone@v1.2.0/pkg/util/export/etl/csv.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package etl 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/csv" 21 "io" 22 "sync" 23 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/common/mpool" 26 "github.com/matrixorigin/matrixone/pkg/common/util" 27 "github.com/matrixorigin/matrixone/pkg/fileservice" 28 "github.com/matrixorigin/matrixone/pkg/util/export/table" 29 ) 30 31 const initedSize = 4 * mpool.MB 32 33 var bufPool = sync.Pool{New: func() any { 34 return bytes.NewBuffer(make([]byte, 0, initedSize)) 35 }} 36 37 func getBuffer() *bytes.Buffer { 38 return bufPool.Get().(*bytes.Buffer) 39 } 40 41 func putBuffer(buf *bytes.Buffer) { 42 if buf != nil { 43 buf.Reset() 44 bufPool.Put(buf) 45 } 46 } 47 48 var _ table.RowWriter = (*CSVWriter)(nil) 49 50 type CSVWriter struct { 51 ctx context.Context 52 writer io.StringWriter 53 54 buf *bytes.Buffer 55 formatter *csv.Writer 56 } 57 58 func NewCSVWriter(ctx context.Context, writer io.StringWriter) *CSVWriter { 59 w := &CSVWriter{ 60 ctx: ctx, 61 writer: writer, 62 buf: nil, 63 formatter: nil, 64 } 65 return w 66 } 67 68 func (w *CSVWriter) initBuffer() { 69 if w.buf == nil { 70 w.buf = getBuffer() 71 w.formatter = csv.NewWriter(w.buf) 72 } 73 } 74 func (w *CSVWriter) releaseBuffer() { 75 if w.buf != nil { 76 w.formatter = nil 77 putBuffer(w.buf) 78 } 79 } 80 81 func (w *CSVWriter) WriteRow(row *table.Row) error { 82 return w.WriteStrings(row.ToStrings()) 83 } 84 85 func (w *CSVWriter) WriteStrings(record []string) error { 86 w.initBuffer() 87 defer w.formatter.Flush() 88 err := w.formatter.Write(record) 89 if err != nil { 90 return moerr.ConvertGoError(w.ctx, err) 91 } 92 return nil 93 } 94 95 func (w *CSVWriter) GetContent() string { 96 return w.buf.String() 97 } 98 99 func (w *CSVWriter) FlushAndClose() (int, error) { 100 defer w.releaseBuffer() 101 if w.buf == nil || w.buf.Len() == 0 { 102 return 0, nil 103 } 104 n, err := w.writer.WriteString(util.UnsafeBytesToString(w.buf.Bytes())) 105 if err != nil { 106 return 0, err 107 } 108 w.writer = nil 109 w.buf = nil 110 return n, nil 111 } 112 113 type FSWriter struct { 114 ctx context.Context // New args 115 fs fileservice.FileService // New args 116 // filepath 117 filepath string // see WithFilePath or auto generated by NewFSWriter 118 119 mux sync.Mutex 120 121 offset int // see Write, should not have size bigger than 2GB 122 } 123 124 type FSWriterOption func(*FSWriter) 125 126 func (f FSWriterOption) Apply(w *FSWriter) { 127 f(w) 128 } 129 130 func NewFSWriter(ctx context.Context, fs fileservice.FileService, opts ...FSWriterOption) *FSWriter { 131 w := &FSWriter{ 132 ctx: ctx, 133 fs: fs, 134 } 135 for _, o := range opts { 136 o.Apply(w) 137 } 138 if len(w.filepath) == 0 { 139 panic("filepath is Empty") 140 } 141 return w 142 } 143 144 func WithFilePath(filepath string) FSWriterOption { 145 return FSWriterOption(func(w *FSWriter) { 146 w.filepath = filepath 147 }) 148 } 149 150 // Write implement io.Writer, Please execute in series 151 func (w *FSWriter) Write(p []byte) (n int, err error) { 152 w.mux.Lock() 153 defer w.mux.Unlock() 154 n = len(p) 155 mkdirTried := false 156 mkdirRetry: 157 if err = w.fs.Write(w.ctx, fileservice.IOVector{ 158 // like: etl:store/system/filename.csv 159 FilePath: w.filepath, 160 Entries: []fileservice.IOEntry{ 161 { 162 Offset: int64(w.offset), 163 Size: int64(n), 164 Data: p, 165 }, 166 }, 167 }); err == nil { 168 w.offset += n 169 } else if moerr.IsMoErrCode(err, moerr.ErrFileAlreadyExists) && !mkdirTried { 170 mkdirTried = true 171 goto mkdirRetry 172 } 173 // XXX Why call this? 174 // _ = errors.WithContext(w.ctx, err) 175 return 176 } 177 178 // WriteString implement io.StringWriter 179 func (w *FSWriter) WriteString(s string) (n int, err error) { 180 var b = util.UnsafeStringToBytes(s) 181 return w.Write(b) 182 }