github.com/matrixorigin/matrixone@v0.7.0/pkg/frontend/export.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package frontend 16 17 import ( 18 "bufio" 19 "bytes" 20 "context" 21 "fmt" 22 "github.com/matrixorigin/matrixone/pkg/fileservice" 23 "golang.org/x/sync/errgroup" 24 "io" 25 "os" 26 "strconv" 27 "sync" 28 29 "github.com/matrixorigin/matrixone/pkg/common/moerr" 30 "github.com/matrixorigin/matrixone/pkg/container/bytejson" 31 32 "github.com/matrixorigin/matrixone/pkg/container/types" 33 "github.com/matrixorigin/matrixone/pkg/defines" 34 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 35 ) 36 37 type ExportParam struct { 38 *tree.ExportParam 39 // file handler 40 File *os.File 41 // bufio.writer 42 Writer *bufio.Writer 43 // curFileSize 44 CurFileSize uint64 45 Rows uint64 46 FileCnt uint 47 ColumnFlag []bool 48 Symbol [][]byte 49 // default flush size 50 DefaultBufSize int64 51 OutputStr []byte 52 LineSize uint64 53 54 //file service & buffer for the line 55 UseFileService bool 56 FileService fileservice.FileService 57 LineBuffer *bytes.Buffer 58 Ctx context.Context 59 AsyncReader *io.PipeReader 60 AsyncWriter *io.PipeWriter 61 AsyncGroup *errgroup.Group 62 } 63 64 var OpenFile = os.OpenFile 65 var escape byte = '"' 66 67 type CloseExportData struct { 68 stopExportData chan interface{} 69 onceClose sync.Once 70 } 71 72 func NewCloseExportData() *CloseExportData { 73 return &CloseExportData{ 74 stopExportData: make(chan interface{}), 75 } 76 } 77 78 func (cld *CloseExportData) Open() { 79 } 80 81 func (cld *CloseExportData) Close() { 82 cld.onceClose.Do(func() { 83 close(cld.stopExportData) 84 }) 85 } 86 87 func initExportFileParam(ep *ExportParam, mrs *MysqlResultSet) { 88 ep.DefaultBufSize *= 1024 * 1024 89 n := (int)(mrs.GetColumnCount()) 90 if n <= 0 { 91 return 92 } 93 ep.Symbol = make([][]byte, n) 94 for i := 0; i < n-1; i++ { 95 ep.Symbol[i] = []byte(ep.Fields.Terminated) 96 } 97 ep.Symbol[n-1] = []byte(ep.Lines.TerminatedBy) 98 ep.ColumnFlag = make([]bool, len(mrs.Name2Index)) 99 for i := 0; i < len(ep.ForceQuote); i++ { 100 col, ok := mrs.Name2Index[ep.ForceQuote[i]] 101 if ok { 102 ep.ColumnFlag[col] = true 103 } 104 } 105 } 106 107 var openNewFile = func(ctx context.Context, ep *ExportParam, mrs *MysqlResultSet) error { 108 lineSize := ep.LineSize 109 var err error 110 ep.CurFileSize = 0 111 if !ep.UseFileService { 112 filePath := getExportFilePath(ep.FilePath, ep.FileCnt) 113 ep.File, err = OpenFile(filePath, os.O_RDWR|os.O_EXCL|os.O_CREATE, 0o666) 114 if err != nil { 115 return err 116 } 117 ep.Writer = bufio.NewWriterSize(ep.File, int(ep.DefaultBufSize)) 118 } else { 119 //default 1MB 120 if ep.LineBuffer == nil { 121 ep.LineBuffer = &bytes.Buffer{} 122 } else { 123 ep.LineBuffer.Reset() 124 } 125 ep.AsyncReader, ep.AsyncWriter = io.Pipe() 126 filePath := getExportFilePath(ep.FilePath, ep.FileCnt) 127 128 asyncWriteFunc := func() error { 129 vec := fileservice.IOVector{ 130 FilePath: filePath, 131 Entries: []fileservice.IOEntry{ 132 { 133 ReaderForWrite: ep.AsyncReader, 134 Size: -1, 135 }, 136 }, 137 } 138 err := ep.FileService.Write(ctx, vec) 139 if err != nil { 140 err2 := ep.AsyncReader.CloseWithError(err) 141 if err2 != nil { 142 return err2 143 } 144 } 145 return err 146 } 147 148 ep.AsyncGroup, _ = errgroup.WithContext(ctx) 149 ep.AsyncGroup.Go(asyncWriteFunc) 150 } 151 if ep.Header { 152 var header string 153 n := len(mrs.Columns) 154 if n == 0 { 155 return nil 156 } 157 for i := 0; i < n-1; i++ { 158 header += mrs.Columns[i].Name() + ep.Fields.Terminated 159 } 160 header += mrs.Columns[n-1].Name() + ep.Lines.TerminatedBy 161 if ep.MaxFileSize != 0 && uint64(len(header)) >= ep.MaxFileSize { 162 return moerr.NewInternalError(ctx, "the header line size is over the maxFileSize") 163 } 164 if err := writeDataToCSVFile(ep, []byte(header)); err != nil { 165 return err 166 } 167 if _, err := EndOfLine(ep); err != nil { 168 return err 169 } 170 } 171 if lineSize != 0 { 172 ep.LineSize = 0 173 ep.Rows = 0 174 if err := writeDataToCSVFile(ep, ep.OutputStr); err != nil { 175 return err 176 } 177 } 178 return nil 179 } 180 181 func getExportFilePath(filename string, fileCnt uint) string { 182 if fileCnt == 0 { 183 return filename 184 } else { 185 return fmt.Sprintf("%s.%d", filename, fileCnt) 186 } 187 } 188 189 var formatOutputString = func(oq *outputQueue, tmp, symbol []byte, enclosed byte, flag bool) error { 190 var err error 191 if flag { 192 if err = writeToCSVFile(oq, []byte{enclosed}); err != nil { 193 return err 194 } 195 } 196 if err = writeToCSVFile(oq, tmp); err != nil { 197 return err 198 } 199 if flag { 200 if err = writeToCSVFile(oq, []byte{enclosed}); err != nil { 201 return err 202 } 203 } 204 if err = writeToCSVFile(oq, symbol); err != nil { 205 return err 206 } 207 return nil 208 } 209 210 var Flush = func(ep *ExportParam) error { 211 if !ep.UseFileService { 212 return ep.Writer.Flush() 213 } 214 return nil 215 } 216 217 var Seek = func(ep *ExportParam) (int64, error) { 218 if !ep.UseFileService { 219 return ep.File.Seek(int64(ep.CurFileSize-ep.LineSize), io.SeekStart) 220 } 221 return 0, nil 222 } 223 224 var Read = func(ep *ExportParam) (int, error) { 225 if !ep.UseFileService { 226 ep.OutputStr = make([]byte, ep.LineSize) 227 return ep.File.Read(ep.OutputStr) 228 } else { 229 ep.OutputStr = make([]byte, ep.LineSize) 230 copy(ep.OutputStr, ep.LineBuffer.Bytes()) 231 ep.LineBuffer.Reset() 232 return int(ep.LineSize), nil 233 } 234 } 235 236 var Truncate = func(ep *ExportParam) error { 237 if !ep.UseFileService { 238 return ep.File.Truncate(int64(ep.CurFileSize - ep.LineSize)) 239 } else { 240 return nil 241 } 242 } 243 244 var Close = func(ep *ExportParam) error { 245 if !ep.UseFileService { 246 ep.FileCnt++ 247 return ep.File.Close() 248 } else { 249 ep.FileCnt++ 250 err := ep.AsyncWriter.Close() 251 if err != nil { 252 return err 253 } 254 err = ep.AsyncGroup.Wait() 255 if err != nil { 256 return err 257 } 258 err = ep.AsyncReader.Close() 259 if err != nil { 260 return err 261 } 262 ep.AsyncReader = nil 263 ep.AsyncWriter = nil 264 ep.AsyncGroup = nil 265 return err 266 } 267 } 268 269 var Write = func(ep *ExportParam, output []byte) (int, error) { 270 if !ep.UseFileService { 271 return ep.Writer.Write(output) 272 } else { 273 return ep.LineBuffer.Write(output) 274 } 275 } 276 277 var EndOfLine = func(ep *ExportParam) (int, error) { 278 if ep.UseFileService { 279 n, err := ep.AsyncWriter.Write(ep.LineBuffer.Bytes()) 280 if err != nil { 281 err2 := ep.AsyncWriter.CloseWithError(err) 282 if err2 != nil { 283 return 0, err2 284 } 285 } 286 ep.LineBuffer.Reset() 287 return n, err 288 } 289 return 0, nil 290 } 291 292 func writeToCSVFile(oq *outputQueue, output []byte) error { 293 if oq.ep.MaxFileSize != 0 && oq.ep.CurFileSize+uint64(len(output)) > oq.ep.MaxFileSize { 294 if oq.ep.Rows == 0 { 295 return moerr.NewInternalError(oq.ctx, "the OneLine size is over the maxFileSize") 296 } 297 298 if err := Flush(oq.ep); err != nil { 299 return err 300 } 301 if oq.ep.LineSize != 0 { 302 if _, err := Seek(oq.ep); err != nil { 303 return err 304 } 305 for { 306 if n, err := Read(oq.ep); err != nil { 307 return err 308 } else if uint64(n) == oq.ep.LineSize { 309 break 310 } 311 } 312 if err := Truncate(oq.ep); err != nil { 313 return err 314 } 315 } 316 if err := Close(oq.ep); err != nil { 317 return err 318 } 319 if err := openNewFile(oq.ctx, oq.ep, oq.mrs); err != nil { 320 return err 321 } 322 } 323 324 if err := writeDataToCSVFile(oq.ep, output); err != nil { 325 return err 326 } 327 return nil 328 } 329 330 var writeDataToCSVFile = func(ep *ExportParam, output []byte) error { 331 for { 332 if n, err := Write(ep, output); err != nil { 333 return err 334 } else if n == len(output) { 335 break 336 } 337 } 338 ep.LineSize += uint64(len(output)) 339 ep.CurFileSize += uint64(len(output)) 340 return nil 341 } 342 343 func addEscapeToString(s []byte) []byte { 344 pos := make([]int, 0) 345 for i := 0; i < len(s); i++ { 346 if s[i] == escape { 347 pos = append(pos, i) 348 } 349 } 350 if len(pos) == 0 { 351 return s 352 } 353 ret := make([]byte, 0) 354 cur := 0 355 for i := 0; i < len(pos); i++ { 356 ret = append(ret, s[cur:pos[i]]...) 357 ret = append(ret, escape) 358 cur = pos[i] 359 } 360 ret = append(ret, s[cur:]...) 361 return ret 362 } 363 364 func exportDataToCSVFile(oq *outputQueue) error { 365 oq.ep.LineSize = 0 366 367 symbol := oq.ep.Symbol 368 closeby := oq.ep.Fields.EnclosedBy 369 flag := oq.ep.ColumnFlag 370 for i := uint64(0); i < oq.mrs.GetColumnCount(); i++ { 371 column, err := oq.mrs.GetColumn(oq.ctx, i) 372 if err != nil { 373 return err 374 } 375 mysqlColumn, ok := column.(*MysqlColumn) 376 if !ok { 377 return moerr.NewInternalError(oq.ctx, "sendColumn need MysqlColumn") 378 } 379 if isNil, err := oq.mrs.ColumnIsNull(oq.ctx, 0, i); err != nil { 380 return err 381 } else if isNil { 382 //NULL is output as \N 383 if err = formatOutputString(oq, []byte{'\\', 'N'}, symbol[i], closeby, false); err != nil { 384 return err 385 } 386 continue 387 } 388 389 switch mysqlColumn.ColumnType() { 390 case defines.MYSQL_TYPE_DECIMAL: 391 value, err := oq.mrs.GetString(oq.ctx, 0, i) 392 if err != nil { 393 return err 394 } 395 if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil { 396 return err 397 } 398 case defines.MYSQL_TYPE_BOOL: 399 value, err := oq.mrs.GetString(oq.ctx, 0, i) 400 if err != nil { 401 return err 402 } 403 if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil { 404 return err 405 } 406 case defines.MYSQL_TYPE_TINY, defines.MYSQL_TYPE_SHORT, defines.MYSQL_TYPE_INT24, defines.MYSQL_TYPE_LONG, defines.MYSQL_TYPE_YEAR: 407 value, err := oq.mrs.GetInt64(oq.ctx, 0, i) 408 if err != nil { 409 return err 410 } 411 if mysqlColumn.ColumnType() == defines.MYSQL_TYPE_YEAR { 412 if value == 0 { 413 if err = formatOutputString(oq, []byte("0000"), symbol[i], closeby, flag[i]); err != nil { 414 return err 415 } 416 } else { 417 oq.resetLineStr() 418 oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10) 419 if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil { 420 return err 421 } 422 } 423 } else { 424 oq.resetLineStr() 425 oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10) 426 if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil { 427 return err 428 } 429 } 430 case defines.MYSQL_TYPE_FLOAT, defines.MYSQL_TYPE_DOUBLE: 431 value, err := oq.mrs.GetFloat64(oq.ctx, 0, i) 432 if err != nil { 433 return err 434 } 435 oq.lineStr = []byte(fmt.Sprintf("%v", value)) 436 if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil { 437 return err 438 } 439 case defines.MYSQL_TYPE_LONGLONG: 440 if uint32(mysqlColumn.Flag())&defines.UNSIGNED_FLAG != 0 { 441 if value, err := oq.mrs.GetUint64(oq.ctx, 0, i); err != nil { 442 return err 443 } else { 444 oq.resetLineStr() 445 oq.lineStr = strconv.AppendUint(oq.lineStr, value, 10) 446 if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil { 447 return err 448 } 449 } 450 } else { 451 if value, err := oq.mrs.GetInt64(oq.ctx, 0, i); err != nil { 452 return err 453 } else { 454 oq.resetLineStr() 455 oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10) 456 if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil { 457 return err 458 } 459 } 460 } 461 case defines.MYSQL_TYPE_VARCHAR, defines.MYSQL_TYPE_VAR_STRING, defines.MYSQL_TYPE_STRING, defines.MYSQL_TYPE_BLOB, defines.MYSQL_TYPE_TEXT: 462 value, err := oq.mrs.GetValue(oq.ctx, 0, i) 463 if err != nil { 464 return err 465 } 466 value = addEscapeToString(value.([]byte)) 467 if err = formatOutputString(oq, value.([]byte), symbol[i], closeby, true); err != nil { 468 return err 469 } 470 case defines.MYSQL_TYPE_DATE: 471 value, err := oq.mrs.GetValue(oq.ctx, 0, i) 472 if err != nil { 473 return err 474 } 475 if err = formatOutputString(oq, []byte(value.(types.Date).String()), symbol[i], closeby, flag[i]); err != nil { 476 return err 477 } 478 case defines.MYSQL_TYPE_TIME: 479 value, err := oq.mrs.GetValue(oq.ctx, 0, i) 480 if err != nil { 481 return err 482 } 483 if err = formatOutputString(oq, []byte(value.(types.Time).String()), symbol[i], closeby, flag[i]); err != nil { 484 return err 485 } 486 case defines.MYSQL_TYPE_DATETIME: 487 value, err := oq.mrs.GetValue(oq.ctx, 0, i) 488 if err != nil { 489 return err 490 } 491 if err = formatOutputString(oq, []byte(value.(string)), symbol[i], closeby, flag[i]); err != nil { 492 return err 493 } 494 case defines.MYSQL_TYPE_TIMESTAMP: 495 value, err := oq.mrs.GetString(oq.ctx, 0, i) 496 if err != nil { 497 return err 498 } 499 if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil { 500 return err 501 } 502 case defines.MYSQL_TYPE_JSON: 503 value, err := oq.mrs.GetValue(oq.ctx, 0, i) 504 if err != nil { 505 return err 506 } 507 jsonStr := value.(bytejson.ByteJson).String() 508 if err = formatOutputString(oq, []byte(jsonStr), symbol[i], closeby, flag[i]); err != nil { 509 return err 510 } 511 case defines.MYSQL_TYPE_UUID: 512 value, err := oq.mrs.GetString(oq.ctx, 0, i) 513 if err != nil { 514 return err 515 } 516 if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil { 517 return err 518 } 519 default: 520 return moerr.NewInternalError(oq.ctx, "unsupported column type %d ", mysqlColumn.ColumnType()) 521 } 522 } 523 oq.ep.Rows++ 524 _, err := EndOfLine(oq.ep) 525 return err 526 }