github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/mvdata/data_loc.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvdata 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "path/filepath" 22 "strings" 23 24 "github.com/dolthub/dolt/go/cmd/dolt/cli" 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 26 "github.com/dolthub/dolt/go/libraries/doltcore/env" 27 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 28 "github.com/dolthub/dolt/go/libraries/doltcore/table" 29 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 30 "github.com/dolthub/dolt/go/libraries/utils/filesys" 31 ) 32 33 // DataFormat is an enumeration of the valid data formats 34 type DataFormat string 35 36 const ( 37 // InvalidDataFormat is the format of a data lotacion that isn't valid 38 InvalidDataFormat DataFormat = "invalid" 39 40 // DoltDB is the format of a data location for a dolt table 41 DoltDB DataFormat = "doltdb" 42 43 // CsvFile is the format of a data location that is a .csv file 44 CsvFile DataFormat = ".csv" 45 46 // PsvFile is the format of a data location that is a .psv file 47 PsvFile DataFormat = ".psv" 48 49 // XlsxFile is the format of a data location that is a .xlsx file 50 XlsxFile DataFormat = ".xlsx" 51 52 // JsonFile is the format of a data location that is a json file 53 JsonFile DataFormat = ".json" 54 55 // SqlFile is the format of a data location that is a .sql file 56 SqlFile DataFormat = ".sql" 57 58 // ParquetFile is the format of a data location that is a .paquet file 59 ParquetFile DataFormat = ".parquet" 60 ) 61 62 // ReadableStr returns a human readable string for a DataFormat 63 func (df DataFormat) ReadableStr() string { 64 switch df { 65 case DoltDB: 66 return "dolt table" 67 case CsvFile: 68 return "csv file" 69 case PsvFile: 70 return "psv file" 71 case XlsxFile: 72 return "xlsx file" 73 case JsonFile: 74 return "json file" 75 case SqlFile: 76 return "sql file" 77 case ParquetFile: 78 return "parquet file" 79 default: 80 return "invalid" 81 } 82 } 83 84 // DataLocation is an interface that can be used to read or write from the source or the destination of a move operation. 85 type DataLocation interface { 86 fmt.Stringer 87 88 // Exists returns true if the DataLocation already exists 89 Exists(ctx context.Context, root doltdb.RootValue, fs filesys.ReadableFS) (bool, error) 90 91 // NewReader creates a TableReadCloser for the DataLocation 92 NewReader(ctx context.Context, dEnv *env.DoltEnv, opts interface{}) (rdCl table.SqlRowReader, sorted bool, err error) 93 94 // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite 95 // an existing table. 96 NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, root doltdb.RootValue, outSch schema.Schema, opts editor.Options, wr io.WriteCloser) (table.SqlRowWriter, error) 97 } 98 99 // NewDataLocation creates a DataLocation object from a path and a format string. If the path is the name of a table 100 // then a TableDataLocation will be returned. If the path is empty a StreamDataLocation is returned. Otherwise a 101 // FileDataLocation is returned. For FileDataLocations and StreamDataLocations, if a file format is provided explicitly 102 // then it is used as the format, otherwise, when it can be, it is inferred from the path for files. Inference is based 103 // on the file's extension. 104 func NewDataLocation(path, fileFmtStr string) DataLocation { 105 dataFmt := DFFromString(fileFmtStr) 106 107 if len(path) == 0 { 108 return StreamDataLocation{Format: dataFmt, Reader: cli.InStream, Writer: cli.OutStream} 109 } else if fileFmtStr == "" { 110 switch strings.ToLower(filepath.Ext(path)) { 111 case string(CsvFile): 112 dataFmt = CsvFile 113 case string(PsvFile): 114 dataFmt = PsvFile 115 case string(XlsxFile): 116 dataFmt = XlsxFile 117 case string(JsonFile): 118 dataFmt = JsonFile 119 case string(SqlFile): 120 dataFmt = SqlFile 121 case string(ParquetFile): 122 dataFmt = ParquetFile 123 } 124 } 125 126 return FileDataLocation{path, dataFmt} 127 }