github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/mvdata/file_data_loc.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package mvdata 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "io" 22 "os" 23 "strings" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/env" 26 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 28 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 29 "github.com/dolthub/dolt/go/libraries/doltcore/table" 30 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 31 "github.com/dolthub/dolt/go/libraries/doltcore/table/typed/json" 32 "github.com/dolthub/dolt/go/libraries/doltcore/table/typed/parquet" 33 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv" 34 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/sqlexport" 35 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/xlsx" 36 "github.com/dolthub/dolt/go/libraries/utils/filesys" 37 ) 38 39 // DFFromString returns a data object from a string. 40 func DFFromString(dfStr string) DataFormat { 41 switch strings.ToLower(dfStr) { 42 case "csv", ".csv": 43 return CsvFile 44 case "psv", ".psv": 45 return PsvFile 46 case "xlsx", ".xlsx": 47 return XlsxFile 48 case "json", ".json": 49 return JsonFile 50 case "sql", ".sql": 51 return SqlFile 52 case "parquet", ".parquet": 53 return ParquetFile 54 default: 55 return InvalidDataFormat 56 } 57 } 58 59 // FileDataLocation is a file that that can be imported from or exported to. 60 type FileDataLocation struct { 61 // Path is the path of the file on the filesystem 62 Path string 63 64 // Format is the DataFormat of the file 65 Format DataFormat 66 } 67 68 // String returns a string representation of the data location. 69 func (dl FileDataLocation) String() string { 70 return dl.Format.ReadableStr() + ":" + dl.Path 71 } 72 73 // Exists returns true if the DataLocation already exists 74 func (dl FileDataLocation) Exists(ctx context.Context, root doltdb.RootValue, fs filesys.ReadableFS) (bool, error) { 75 exists, _ := fs.Exists(dl.Path) 76 return exists, nil 77 } 78 79 // NewReader creates a TableReadCloser for the DataLocation 80 func (dl FileDataLocation) NewReader(ctx context.Context, dEnv *env.DoltEnv, opts interface{}) (rdCl table.SqlRowReader, sorted bool, err error) { 81 fs := dEnv.FS 82 root, err := dEnv.WorkingRoot(ctx) 83 if err != nil { 84 return nil, false, err 85 } 86 87 exists, isDir := fs.Exists(dl.Path) 88 89 if !exists { 90 return nil, false, os.ErrNotExist 91 } else if isDir { 92 return nil, false, filesys.ErrIsDir 93 } 94 95 switch dl.Format { 96 case CsvFile: 97 delim := "," 98 99 if opts != nil { 100 csvOpts, _ := opts.(CsvOptions) 101 102 if len(csvOpts.Delim) != 0 { 103 delim = csvOpts.Delim 104 } 105 } 106 107 rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim(delim)) 108 109 return rd, false, err 110 111 case PsvFile: 112 rd, err := csv.OpenCSVReader(root.VRW().Format(), dl.Path, fs, csv.NewCSVInfo().SetDelim("|")) 113 return rd, false, err 114 115 case XlsxFile: 116 xlsxOpts := opts.(XlsxOptions) 117 rd, err := xlsx.OpenXLSXReader(ctx, root.VRW(), dl.Path, fs, &xlsx.XLSXFileInfo{SheetName: xlsxOpts.SheetName}) 118 return rd, false, err 119 120 case JsonFile: 121 var sch schema.Schema 122 jsonOpts, _ := opts.(JSONOptions) 123 if jsonOpts.SchFile != "" { 124 tn, s, err := SchAndTableNameFromFile(ctx, jsonOpts.SchFile, dEnv) 125 if err != nil { 126 return nil, false, err 127 } 128 if tn != jsonOpts.TableName { 129 return nil, false, fmt.Errorf("table name '%s' from schema file %s does not match table arg '%s'", tn, jsonOpts.SchFile, jsonOpts.TableName) 130 } 131 sch = s 132 } else { 133 if opts == nil { 134 return nil, false, errors.New("Unable to determine table name on JSON import") 135 } 136 tbl, exists, err := root.GetTable(context.TODO(), doltdb.TableName{Name: jsonOpts.TableName}) 137 if !exists { 138 return nil, false, fmt.Errorf("The following table could not be found:\n%v", jsonOpts.TableName) 139 } 140 if err != nil { 141 return nil, false, fmt.Errorf("An error occurred attempting to read the table:\n%v", err.Error()) 142 } 143 sch, err = tbl.GetSchema(context.TODO()) 144 if err != nil { 145 return nil, false, fmt.Errorf("An error occurred attempting to read the table schema:\n%v", err.Error()) 146 } 147 } 148 149 rd, err := json.OpenJSONReader(root.VRW(), dl.Path, fs, sch) 150 return rd, false, err 151 152 case ParquetFile: 153 var tableSch schema.Schema 154 parquetOpts, _ := opts.(ParquetOptions) 155 if parquetOpts.SchFile != "" { 156 tn, s, tnErr := SchAndTableNameFromFile(ctx, parquetOpts.SchFile, dEnv) 157 if tnErr != nil { 158 return nil, false, tnErr 159 } 160 if tn != parquetOpts.TableName { 161 return nil, false, fmt.Errorf("table name '%s' from schema file %s does not match table arg '%s'", tn, parquetOpts.SchFile, parquetOpts.TableName) 162 } 163 tableSch = s 164 } else { 165 if opts == nil { 166 return nil, false, errors.New("Unable to determine table name on JSON import") 167 } 168 tbl, tableExists, tErr := root.GetTable(context.TODO(), doltdb.TableName{Name: parquetOpts.TableName}) 169 if !tableExists { 170 return nil, false, fmt.Errorf("The following table could not be found:\n%v", parquetOpts.TableName) 171 } 172 if tErr != nil { 173 return nil, false, fmt.Errorf("An error occurred attempting to read the table:\n%v", err.Error()) 174 } 175 tableSch, err = tbl.GetSchema(context.TODO()) 176 if err != nil { 177 return nil, false, fmt.Errorf("An error occurred attempting to read the table schema:\n%v", err.Error()) 178 } 179 } 180 rd, rErr := parquet.OpenParquetReader(root.VRW(), dl.Path, tableSch) 181 return rd, false, rErr 182 } 183 184 return nil, false, errors.New("unsupported format") 185 } 186 187 // NewCreatingWriter will create a TableWriteCloser for a DataLocation that will create a new table, or overwrite 188 // an existing table. 189 func (dl FileDataLocation) NewCreatingWriter(ctx context.Context, mvOpts DataMoverOptions, root doltdb.RootValue, outSch schema.Schema, opts editor.Options, wr io.WriteCloser) (table.SqlRowWriter, error) { 190 switch dl.Format { 191 case CsvFile: 192 return csv.NewCSVWriter(wr, outSch, csv.NewCSVInfo()) 193 case PsvFile: 194 return csv.NewCSVWriter(wr, outSch, csv.NewCSVInfo().SetDelim("|")) 195 case XlsxFile: 196 panic("writing to xlsx files is not supported yet") 197 case JsonFile: 198 return json.NewJSONWriter(wr, outSch) 199 case SqlFile: 200 if mvOpts.IsBatched() { 201 return sqlexport.OpenBatchedSQLExportWriter(ctx, wr, root, mvOpts.SrcName(), mvOpts.IsAutocommitOff(), outSch, opts) 202 } else { 203 return sqlexport.OpenSQLExportWriter(ctx, wr, root, mvOpts.SrcName(), mvOpts.IsAutocommitOff(), outSch, opts) 204 } 205 case ParquetFile: 206 return parquet.NewParquetRowWriterForFile(outSch, mvOpts.DestName()) 207 } 208 209 panic("Invalid Data Format." + string(dl.Format)) 210 }