github.com/matrixorigin/matrixone@v0.7.0/pkg/frontend/export.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package frontend
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"context"
    21  	"fmt"
    22  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    23  	"golang.org/x/sync/errgroup"
    24  	"io"
    25  	"os"
    26  	"strconv"
    27  	"sync"
    28  
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  	"github.com/matrixorigin/matrixone/pkg/container/bytejson"
    31  
    32  	"github.com/matrixorigin/matrixone/pkg/container/types"
    33  	"github.com/matrixorigin/matrixone/pkg/defines"
    34  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    35  )
    36  
    37  type ExportParam struct {
    38  	*tree.ExportParam
    39  	// file handler
    40  	File *os.File
    41  	// bufio.writer
    42  	Writer *bufio.Writer
    43  	// curFileSize
    44  	CurFileSize uint64
    45  	Rows        uint64
    46  	FileCnt     uint
    47  	ColumnFlag  []bool
    48  	Symbol      [][]byte
    49  	// default flush size
    50  	DefaultBufSize int64
    51  	OutputStr      []byte
    52  	LineSize       uint64
    53  
    54  	//file service & buffer for the line
    55  	UseFileService bool
    56  	FileService    fileservice.FileService
    57  	LineBuffer     *bytes.Buffer
    58  	Ctx            context.Context
    59  	AsyncReader    *io.PipeReader
    60  	AsyncWriter    *io.PipeWriter
    61  	AsyncGroup     *errgroup.Group
    62  }
    63  
    64  var OpenFile = os.OpenFile
    65  var escape byte = '"'
    66  
    67  type CloseExportData struct {
    68  	stopExportData chan interface{}
    69  	onceClose      sync.Once
    70  }
    71  
    72  func NewCloseExportData() *CloseExportData {
    73  	return &CloseExportData{
    74  		stopExportData: make(chan interface{}),
    75  	}
    76  }
    77  
    78  func (cld *CloseExportData) Open() {
    79  }
    80  
    81  func (cld *CloseExportData) Close() {
    82  	cld.onceClose.Do(func() {
    83  		close(cld.stopExportData)
    84  	})
    85  }
    86  
    87  func initExportFileParam(ep *ExportParam, mrs *MysqlResultSet) {
    88  	ep.DefaultBufSize *= 1024 * 1024
    89  	n := (int)(mrs.GetColumnCount())
    90  	if n <= 0 {
    91  		return
    92  	}
    93  	ep.Symbol = make([][]byte, n)
    94  	for i := 0; i < n-1; i++ {
    95  		ep.Symbol[i] = []byte(ep.Fields.Terminated)
    96  	}
    97  	ep.Symbol[n-1] = []byte(ep.Lines.TerminatedBy)
    98  	ep.ColumnFlag = make([]bool, len(mrs.Name2Index))
    99  	for i := 0; i < len(ep.ForceQuote); i++ {
   100  		col, ok := mrs.Name2Index[ep.ForceQuote[i]]
   101  		if ok {
   102  			ep.ColumnFlag[col] = true
   103  		}
   104  	}
   105  }
   106  
   107  var openNewFile = func(ctx context.Context, ep *ExportParam, mrs *MysqlResultSet) error {
   108  	lineSize := ep.LineSize
   109  	var err error
   110  	ep.CurFileSize = 0
   111  	if !ep.UseFileService {
   112  		filePath := getExportFilePath(ep.FilePath, ep.FileCnt)
   113  		ep.File, err = OpenFile(filePath, os.O_RDWR|os.O_EXCL|os.O_CREATE, 0o666)
   114  		if err != nil {
   115  			return err
   116  		}
   117  		ep.Writer = bufio.NewWriterSize(ep.File, int(ep.DefaultBufSize))
   118  	} else {
   119  		//default 1MB
   120  		if ep.LineBuffer == nil {
   121  			ep.LineBuffer = &bytes.Buffer{}
   122  		} else {
   123  			ep.LineBuffer.Reset()
   124  		}
   125  		ep.AsyncReader, ep.AsyncWriter = io.Pipe()
   126  		filePath := getExportFilePath(ep.FilePath, ep.FileCnt)
   127  
   128  		asyncWriteFunc := func() error {
   129  			vec := fileservice.IOVector{
   130  				FilePath: filePath,
   131  				Entries: []fileservice.IOEntry{
   132  					{
   133  						ReaderForWrite: ep.AsyncReader,
   134  						Size:           -1,
   135  					},
   136  				},
   137  			}
   138  			err := ep.FileService.Write(ctx, vec)
   139  			if err != nil {
   140  				err2 := ep.AsyncReader.CloseWithError(err)
   141  				if err2 != nil {
   142  					return err2
   143  				}
   144  			}
   145  			return err
   146  		}
   147  
   148  		ep.AsyncGroup, _ = errgroup.WithContext(ctx)
   149  		ep.AsyncGroup.Go(asyncWriteFunc)
   150  	}
   151  	if ep.Header {
   152  		var header string
   153  		n := len(mrs.Columns)
   154  		if n == 0 {
   155  			return nil
   156  		}
   157  		for i := 0; i < n-1; i++ {
   158  			header += mrs.Columns[i].Name() + ep.Fields.Terminated
   159  		}
   160  		header += mrs.Columns[n-1].Name() + ep.Lines.TerminatedBy
   161  		if ep.MaxFileSize != 0 && uint64(len(header)) >= ep.MaxFileSize {
   162  			return moerr.NewInternalError(ctx, "the header line size is over the maxFileSize")
   163  		}
   164  		if err := writeDataToCSVFile(ep, []byte(header)); err != nil {
   165  			return err
   166  		}
   167  		if _, err := EndOfLine(ep); err != nil {
   168  			return err
   169  		}
   170  	}
   171  	if lineSize != 0 {
   172  		ep.LineSize = 0
   173  		ep.Rows = 0
   174  		if err := writeDataToCSVFile(ep, ep.OutputStr); err != nil {
   175  			return err
   176  		}
   177  	}
   178  	return nil
   179  }
   180  
   181  func getExportFilePath(filename string, fileCnt uint) string {
   182  	if fileCnt == 0 {
   183  		return filename
   184  	} else {
   185  		return fmt.Sprintf("%s.%d", filename, fileCnt)
   186  	}
   187  }
   188  
   189  var formatOutputString = func(oq *outputQueue, tmp, symbol []byte, enclosed byte, flag bool) error {
   190  	var err error
   191  	if flag {
   192  		if err = writeToCSVFile(oq, []byte{enclosed}); err != nil {
   193  			return err
   194  		}
   195  	}
   196  	if err = writeToCSVFile(oq, tmp); err != nil {
   197  		return err
   198  	}
   199  	if flag {
   200  		if err = writeToCSVFile(oq, []byte{enclosed}); err != nil {
   201  			return err
   202  		}
   203  	}
   204  	if err = writeToCSVFile(oq, symbol); err != nil {
   205  		return err
   206  	}
   207  	return nil
   208  }
   209  
   210  var Flush = func(ep *ExportParam) error {
   211  	if !ep.UseFileService {
   212  		return ep.Writer.Flush()
   213  	}
   214  	return nil
   215  }
   216  
   217  var Seek = func(ep *ExportParam) (int64, error) {
   218  	if !ep.UseFileService {
   219  		return ep.File.Seek(int64(ep.CurFileSize-ep.LineSize), io.SeekStart)
   220  	}
   221  	return 0, nil
   222  }
   223  
   224  var Read = func(ep *ExportParam) (int, error) {
   225  	if !ep.UseFileService {
   226  		ep.OutputStr = make([]byte, ep.LineSize)
   227  		return ep.File.Read(ep.OutputStr)
   228  	} else {
   229  		ep.OutputStr = make([]byte, ep.LineSize)
   230  		copy(ep.OutputStr, ep.LineBuffer.Bytes())
   231  		ep.LineBuffer.Reset()
   232  		return int(ep.LineSize), nil
   233  	}
   234  }
   235  
   236  var Truncate = func(ep *ExportParam) error {
   237  	if !ep.UseFileService {
   238  		return ep.File.Truncate(int64(ep.CurFileSize - ep.LineSize))
   239  	} else {
   240  		return nil
   241  	}
   242  }
   243  
   244  var Close = func(ep *ExportParam) error {
   245  	if !ep.UseFileService {
   246  		ep.FileCnt++
   247  		return ep.File.Close()
   248  	} else {
   249  		ep.FileCnt++
   250  		err := ep.AsyncWriter.Close()
   251  		if err != nil {
   252  			return err
   253  		}
   254  		err = ep.AsyncGroup.Wait()
   255  		if err != nil {
   256  			return err
   257  		}
   258  		err = ep.AsyncReader.Close()
   259  		if err != nil {
   260  			return err
   261  		}
   262  		ep.AsyncReader = nil
   263  		ep.AsyncWriter = nil
   264  		ep.AsyncGroup = nil
   265  		return err
   266  	}
   267  }
   268  
   269  var Write = func(ep *ExportParam, output []byte) (int, error) {
   270  	if !ep.UseFileService {
   271  		return ep.Writer.Write(output)
   272  	} else {
   273  		return ep.LineBuffer.Write(output)
   274  	}
   275  }
   276  
   277  var EndOfLine = func(ep *ExportParam) (int, error) {
   278  	if ep.UseFileService {
   279  		n, err := ep.AsyncWriter.Write(ep.LineBuffer.Bytes())
   280  		if err != nil {
   281  			err2 := ep.AsyncWriter.CloseWithError(err)
   282  			if err2 != nil {
   283  				return 0, err2
   284  			}
   285  		}
   286  		ep.LineBuffer.Reset()
   287  		return n, err
   288  	}
   289  	return 0, nil
   290  }
   291  
   292  func writeToCSVFile(oq *outputQueue, output []byte) error {
   293  	if oq.ep.MaxFileSize != 0 && oq.ep.CurFileSize+uint64(len(output)) > oq.ep.MaxFileSize {
   294  		if oq.ep.Rows == 0 {
   295  			return moerr.NewInternalError(oq.ctx, "the OneLine size is over the maxFileSize")
   296  		}
   297  
   298  		if err := Flush(oq.ep); err != nil {
   299  			return err
   300  		}
   301  		if oq.ep.LineSize != 0 {
   302  			if _, err := Seek(oq.ep); err != nil {
   303  				return err
   304  			}
   305  			for {
   306  				if n, err := Read(oq.ep); err != nil {
   307  					return err
   308  				} else if uint64(n) == oq.ep.LineSize {
   309  					break
   310  				}
   311  			}
   312  			if err := Truncate(oq.ep); err != nil {
   313  				return err
   314  			}
   315  		}
   316  		if err := Close(oq.ep); err != nil {
   317  			return err
   318  		}
   319  		if err := openNewFile(oq.ctx, oq.ep, oq.mrs); err != nil {
   320  			return err
   321  		}
   322  	}
   323  
   324  	if err := writeDataToCSVFile(oq.ep, output); err != nil {
   325  		return err
   326  	}
   327  	return nil
   328  }
   329  
   330  var writeDataToCSVFile = func(ep *ExportParam, output []byte) error {
   331  	for {
   332  		if n, err := Write(ep, output); err != nil {
   333  			return err
   334  		} else if n == len(output) {
   335  			break
   336  		}
   337  	}
   338  	ep.LineSize += uint64(len(output))
   339  	ep.CurFileSize += uint64(len(output))
   340  	return nil
   341  }
   342  
   343  func addEscapeToString(s []byte) []byte {
   344  	pos := make([]int, 0)
   345  	for i := 0; i < len(s); i++ {
   346  		if s[i] == escape {
   347  			pos = append(pos, i)
   348  		}
   349  	}
   350  	if len(pos) == 0 {
   351  		return s
   352  	}
   353  	ret := make([]byte, 0)
   354  	cur := 0
   355  	for i := 0; i < len(pos); i++ {
   356  		ret = append(ret, s[cur:pos[i]]...)
   357  		ret = append(ret, escape)
   358  		cur = pos[i]
   359  	}
   360  	ret = append(ret, s[cur:]...)
   361  	return ret
   362  }
   363  
   364  func exportDataToCSVFile(oq *outputQueue) error {
   365  	oq.ep.LineSize = 0
   366  
   367  	symbol := oq.ep.Symbol
   368  	closeby := oq.ep.Fields.EnclosedBy
   369  	flag := oq.ep.ColumnFlag
   370  	for i := uint64(0); i < oq.mrs.GetColumnCount(); i++ {
   371  		column, err := oq.mrs.GetColumn(oq.ctx, i)
   372  		if err != nil {
   373  			return err
   374  		}
   375  		mysqlColumn, ok := column.(*MysqlColumn)
   376  		if !ok {
   377  			return moerr.NewInternalError(oq.ctx, "sendColumn need MysqlColumn")
   378  		}
   379  		if isNil, err := oq.mrs.ColumnIsNull(oq.ctx, 0, i); err != nil {
   380  			return err
   381  		} else if isNil {
   382  			//NULL is output as \N
   383  			if err = formatOutputString(oq, []byte{'\\', 'N'}, symbol[i], closeby, false); err != nil {
   384  				return err
   385  			}
   386  			continue
   387  		}
   388  
   389  		switch mysqlColumn.ColumnType() {
   390  		case defines.MYSQL_TYPE_DECIMAL:
   391  			value, err := oq.mrs.GetString(oq.ctx, 0, i)
   392  			if err != nil {
   393  				return err
   394  			}
   395  			if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil {
   396  				return err
   397  			}
   398  		case defines.MYSQL_TYPE_BOOL:
   399  			value, err := oq.mrs.GetString(oq.ctx, 0, i)
   400  			if err != nil {
   401  				return err
   402  			}
   403  			if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil {
   404  				return err
   405  			}
   406  		case defines.MYSQL_TYPE_TINY, defines.MYSQL_TYPE_SHORT, defines.MYSQL_TYPE_INT24, defines.MYSQL_TYPE_LONG, defines.MYSQL_TYPE_YEAR:
   407  			value, err := oq.mrs.GetInt64(oq.ctx, 0, i)
   408  			if err != nil {
   409  				return err
   410  			}
   411  			if mysqlColumn.ColumnType() == defines.MYSQL_TYPE_YEAR {
   412  				if value == 0 {
   413  					if err = formatOutputString(oq, []byte("0000"), symbol[i], closeby, flag[i]); err != nil {
   414  						return err
   415  					}
   416  				} else {
   417  					oq.resetLineStr()
   418  					oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10)
   419  					if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil {
   420  						return err
   421  					}
   422  				}
   423  			} else {
   424  				oq.resetLineStr()
   425  				oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10)
   426  				if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil {
   427  					return err
   428  				}
   429  			}
   430  		case defines.MYSQL_TYPE_FLOAT, defines.MYSQL_TYPE_DOUBLE:
   431  			value, err := oq.mrs.GetFloat64(oq.ctx, 0, i)
   432  			if err != nil {
   433  				return err
   434  			}
   435  			oq.lineStr = []byte(fmt.Sprintf("%v", value))
   436  			if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil {
   437  				return err
   438  			}
   439  		case defines.MYSQL_TYPE_LONGLONG:
   440  			if uint32(mysqlColumn.Flag())&defines.UNSIGNED_FLAG != 0 {
   441  				if value, err := oq.mrs.GetUint64(oq.ctx, 0, i); err != nil {
   442  					return err
   443  				} else {
   444  					oq.resetLineStr()
   445  					oq.lineStr = strconv.AppendUint(oq.lineStr, value, 10)
   446  					if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil {
   447  						return err
   448  					}
   449  				}
   450  			} else {
   451  				if value, err := oq.mrs.GetInt64(oq.ctx, 0, i); err != nil {
   452  					return err
   453  				} else {
   454  					oq.resetLineStr()
   455  					oq.lineStr = strconv.AppendInt(oq.lineStr, value, 10)
   456  					if err = formatOutputString(oq, oq.lineStr, symbol[i], closeby, flag[i]); err != nil {
   457  						return err
   458  					}
   459  				}
   460  			}
   461  		case defines.MYSQL_TYPE_VARCHAR, defines.MYSQL_TYPE_VAR_STRING, defines.MYSQL_TYPE_STRING, defines.MYSQL_TYPE_BLOB, defines.MYSQL_TYPE_TEXT:
   462  			value, err := oq.mrs.GetValue(oq.ctx, 0, i)
   463  			if err != nil {
   464  				return err
   465  			}
   466  			value = addEscapeToString(value.([]byte))
   467  			if err = formatOutputString(oq, value.([]byte), symbol[i], closeby, true); err != nil {
   468  				return err
   469  			}
   470  		case defines.MYSQL_TYPE_DATE:
   471  			value, err := oq.mrs.GetValue(oq.ctx, 0, i)
   472  			if err != nil {
   473  				return err
   474  			}
   475  			if err = formatOutputString(oq, []byte(value.(types.Date).String()), symbol[i], closeby, flag[i]); err != nil {
   476  				return err
   477  			}
   478  		case defines.MYSQL_TYPE_TIME:
   479  			value, err := oq.mrs.GetValue(oq.ctx, 0, i)
   480  			if err != nil {
   481  				return err
   482  			}
   483  			if err = formatOutputString(oq, []byte(value.(types.Time).String()), symbol[i], closeby, flag[i]); err != nil {
   484  				return err
   485  			}
   486  		case defines.MYSQL_TYPE_DATETIME:
   487  			value, err := oq.mrs.GetValue(oq.ctx, 0, i)
   488  			if err != nil {
   489  				return err
   490  			}
   491  			if err = formatOutputString(oq, []byte(value.(string)), symbol[i], closeby, flag[i]); err != nil {
   492  				return err
   493  			}
   494  		case defines.MYSQL_TYPE_TIMESTAMP:
   495  			value, err := oq.mrs.GetString(oq.ctx, 0, i)
   496  			if err != nil {
   497  				return err
   498  			}
   499  			if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil {
   500  				return err
   501  			}
   502  		case defines.MYSQL_TYPE_JSON:
   503  			value, err := oq.mrs.GetValue(oq.ctx, 0, i)
   504  			if err != nil {
   505  				return err
   506  			}
   507  			jsonStr := value.(bytejson.ByteJson).String()
   508  			if err = formatOutputString(oq, []byte(jsonStr), symbol[i], closeby, flag[i]); err != nil {
   509  				return err
   510  			}
   511  		case defines.MYSQL_TYPE_UUID:
   512  			value, err := oq.mrs.GetString(oq.ctx, 0, i)
   513  			if err != nil {
   514  				return err
   515  			}
   516  			if err = formatOutputString(oq, []byte(value), symbol[i], closeby, flag[i]); err != nil {
   517  				return err
   518  			}
   519  		default:
   520  			return moerr.NewInternalError(oq.ctx, "unsupported column type %d ", mysqlColumn.ColumnType())
   521  		}
   522  	}
   523  	oq.ep.Rows++
   524  	_, err := EndOfLine(oq.ep)
   525  	return err
   526  }